diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py index 551346bb..45a93e22 100644 --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -1,193 +1,186 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import traceback from django.utils.html import escape from rest_framework.response import Response from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.api import utils from swh.web.common.exc import NotFoundExc, ForbiddenExc, BadInputExc from swh.web.common.utils import shorten_path, gen_path_info from swh.web.config import get_config -def compute_link_header(request, rv, options): +def compute_link_header(rv, options): """Add Link header in returned value results. Args: request: a DRF Request object rv (dict): dictionary with keys: - headers: potential headers with 'link-next' and 'link-prev' keys - results: containing the result to return options (dict): the initial dict to update with result if any Returns: dict: dictionary with optional keys 'link-next' and 'link-prev' """ link_headers = [] if 'headers' not in rv: return {} rv_headers = rv['headers'] if 'link-next' in rv_headers: - link_headers.append('<%s>; rel="next"' % ( - request.build_absolute_uri(rv_headers['link-next']))) + link_headers.append('<%s>; rel="next"' % rv_headers['link-next']) if 'link-prev' in rv_headers: - link_headers.append('<%s>; rel="previous"' % ( - request.build_absolute_uri(rv_headers['link-prev']))) + link_headers.append('<%s>; rel="previous"' % rv_headers['link-prev']) if link_headers: link_header_str = ','.join(link_headers) headers = options.get('headers', {}) headers.update({ 'Link': link_header_str }) return headers return {} def filter_by_fields(request, data): """Extract a request parameter 'fields' if it exists to permit the filtering on the data dict's keys. If such field is not provided, returns the data as is. """ fields = request.query_params.get('fields') if fields: fields = set(fields.split(',')) data = utils.filter_field_keys(data, fields) return data def transform(rv): """Transform an eventual returned value with multiple layer of information with only what's necessary. If the returned value rv contains the 'results' key, this is the associated value which is returned. Otherwise, return the initial dict without the potential 'headers' key. """ if 'results' in rv: return rv['results'] if 'headers' in rv: rv.pop('headers') return rv def make_api_response(request, data, doc_data={}, options={}): """Generates an API response based on the requested mimetype. Args: request: a DRF Request object data: raw data to return in the API response doc_data: documentation data for HTML response options: optional data that can be used to generate the response Returns: a DRF Response a object """ if data: - options['headers'] = compute_link_header(request, data, options) + options['headers'] = compute_link_header(data, options) data = transform(data) data = filter_by_fields(request, data) doc_env = doc_data headers = {} if 'headers' in options: doc_env['headers_data'] = options['headers'] headers = options['headers'] # get request status code doc_env['status_code'] = options.get('status', 200) response_args = {'status': doc_env['status_code'], 'headers': headers, 'content_type': request.accepted_media_type} # when requesting HTML, typically when browsing the API through its # documented views, we need to enrich the input data with documentation # related ones and inform DRF that we request HTML template rendering if request.accepted_media_type == 'text/html': if data: data = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')) doc_env['response_data'] = data - doc_env['request'] = { - 'path': request.path, - 'method': request.method, - 'absolute_uri': request.build_absolute_uri(), - } doc_env['heading'] = shorten_path(str(request.path)) if 'route' in doc_env: doc_env['endpoint_path'] = gen_path_info(doc_env['route']) response_args['data'] = doc_env response_args['template_name'] = 'api/apidoc.html' # otherwise simply return the raw data and let DRF picks # the correct renderer (JSON or YAML) else: response_args['data'] = data return Response(**response_args) def error_response(request, error, doc_data): """Private function to create a custom error response. Args: request: a DRF Request object error: the exception that caused the error doc_data: documentation data for HTML response """ error_code = 500 if isinstance(error, BadInputExc): error_code = 400 elif isinstance(error, NotFoundExc): error_code = 404 elif isinstance(error, ForbiddenExc): error_code = 403 elif isinstance(error, StorageDBError): error_code = 503 elif isinstance(error, StorageAPIError): error_code = 503 error_opts = {'status': error_code} error_data = { 'exception': error.__class__.__name__, 'reason': str(error), } if request.accepted_media_type == 'text/html': error_data['reason'] = escape(error_data['reason']) if get_config()['debug']: error_data['traceback'] = traceback.format_exc() return make_api_response(request, error_data, doc_data, options=error_opts) diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py index 011e3ac0..8c1009ed 100644 --- a/swh/web/api/utils.py +++ b/swh/web/api/utils.py @@ -1,196 +1,324 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Dict, Optional, Any -from swh.web.common.utils import reverse +from django.http import HttpRequest + +from swh.web.common.utils import reverse, resolve_branch_alias from swh.web.common.query import parse_hash def filter_field_keys(data, field_keys): """Given an object instance (directory or list), and a csv field keys to filter on. Return the object instance with filtered keys. Note: Returns obj as is if it's an instance of types not in (dictionary, list) Args: - data: one object (dictionary, list...) to filter. - field_keys: csv or set of keys to filter the object on Returns: obj filtered on field_keys """ if isinstance(data, map): return map(lambda x: filter_field_keys(x, field_keys), data) if isinstance(data, list): return [filter_field_keys(x, field_keys) for x in data] if isinstance(data, dict): return {k: v for (k, v) in data.items() if k in field_keys} return data def person_to_string(person): """Map a person (person, committer, tagger, etc...) to a string. """ return ''.join([person['name'], ' <', person['email'], '>']) -def enrich_object(object): +def enrich_object(object: Dict[str, str], + request: Optional[HttpRequest] = None) -> Dict[str, str]: """Enrich an object (revision, release) with link to the 'target' of type 'target_type'. Args: object: An object with target and target_type keys (e.g. release, revision) + request: Absolute URIs will be generated if provided Returns: - Object enriched with target_url pointing to the right - swh.web.ui.api urls for the pointing object (revision, - release, content, directory) + Object enriched with target object url (revision, release, content, + directory) """ - obj = object.copy() - if 'target' in obj and 'target_type' in obj: - if obj['target_type'] in ('revision', 'release', 'directory'): - obj['target_url'] = \ - reverse('api-1-%s' % obj['target_type'], - url_args={'sha1_git': obj['target']}) - elif obj['target_type'] == 'content': - obj['target_url'] = \ - reverse('api-1-content', - url_args={'q': 'sha1_git:' + obj['target']}) - elif obj['target_type'] == 'snapshot': - obj['target_url'] = \ - reverse('api-1-snapshot', - url_args={'snapshot_id': obj['target']}) - - return obj + if 'target' in object and 'target_type' in object: + if object['target_type'] in ('revision', 'release', 'directory'): + object['target_url'] = reverse( + 'api-1-%s' % object['target_type'], + url_args={'sha1_git': object['target']}, + request=request) + elif object['target_type'] == 'content': + object['target_url'] = reverse( + 'api-1-content', + url_args={'q': 'sha1_git:' + object['target']}, + request=request) + elif object['target_type'] == 'snapshot': + object['target_url'] = reverse( + 'api-1-snapshot', + url_args={'snapshot_id': object['target']}, + request=request) + + return object enrich_release = enrich_object -def enrich_directory(directory, context_url=None): +def enrich_directory(directory: Dict[str, str], + request: Optional[HttpRequest] = None) -> Dict[str, str]: """Enrich directory with url to content or directory. + Args: + directory: dict of data associated to a swh directory object + request: Absolute URIs will be generated if provided + + Returns: + An enriched directory dict filled with additional urls """ if 'type' in directory: target_type = directory['type'] target = directory['target'] if target_type == 'file': directory['target_url'] = reverse( - 'api-1-content', url_args={'q': 'sha1_git:%s' % target}) - if context_url: - directory['file_url'] = context_url + directory['name'] + '/' + 'api-1-content', + url_args={'q': 'sha1_git:%s' % target}, + request=request) elif target_type == 'dir': directory['target_url'] = reverse( - 'api-1-directory', url_args={'sha1_git': target}) - if context_url: - directory['dir_url'] = context_url + directory['name'] + '/' + 'api-1-directory', + url_args={'sha1_git': target}, + request=request) else: directory['target_url'] = reverse( - 'api-1-revision', url_args={'sha1_git': target}) - if context_url: - directory['rev_url'] = context_url + directory['name'] + '/' + 'api-1-revision', + url_args={'sha1_git': target}, + request=request) return directory -def enrich_metadata_endpoint(content): - """Enrich metadata endpoint with link to the upper metadata endpoint. +def enrich_metadata_endpoint(content_metadata: Dict[str, str], + request: Optional[HttpRequest] = None + ) -> Dict[str, str]: + """Enrich content metadata dict with link to the upper metadata endpoint. + Args: + content_metadata: dict of data associated to a swh content metadata + request: Absolute URIs will be generated if provided + + Returns: + An enriched content metadata dict filled with an additional url """ - c = content.copy() + c = content_metadata c['content_url'] = reverse('api-1-content', - url_args={'q': 'sha1:%s' % c['id']}) + url_args={'q': 'sha1:%s' % c['id']}, + request=request) return c -def enrich_content(content, top_url=False, query_string=None): +def enrich_content(content: Dict[str, Any], + top_url: Optional[bool] = False, + query_string: Optional[str] = None, + request: Optional[HttpRequest] = None) -> Dict[str, str]: """Enrich content with links to: - data_url: its raw data - filetype_url: its filetype information - language_url: its programming language information - license_url: its licensing information Args: content: dict of data associated to a swh content object top_url: whether or not to include the content url in the enriched data query_string: optional query string of type ':' used when requesting the content, it acts as a hint for picking the same hash method when computing the url listed above + request: Absolute URIs will be generated if provided Returns: An enriched content dict filled with additional urls """ checksums = content if 'checksums' in content: checksums = content['checksums'] hash_algo = 'sha1' if query_string: hash_algo = parse_hash(query_string)[0] if hash_algo in checksums: q = '%s:%s' % (hash_algo, checksums[hash_algo]) if top_url: content['content_url'] = reverse( 'api-1-content', url_args={'q': q}) - content['data_url'] = reverse('api-1-content-raw', url_args={'q': q}) + content['data_url'] = reverse('api-1-content-raw', + url_args={'q': q}, + request=request) content['filetype_url'] = reverse( - 'api-1-content-filetype', url_args={'q': q}) + 'api-1-content-filetype', + url_args={'q': q}, + request=request) content['language_url'] = reverse( - 'api-1-content-language', url_args={'q': q}) + 'api-1-content-language', + url_args={'q': q}, + request=request) content['license_url'] = reverse( - 'api-1-content-license', url_args={'q': q}) + 'api-1-content-license', + url_args={'q': q}, + request=request) return content -def enrich_revision(revision): +def enrich_revision(revision: Dict[str, Any], + request: Optional[HttpRequest] = None) -> Dict[str, Any]: """Enrich revision with links where it makes sense (directory, parents). Keep track of the navigation breadcrumbs if they are specified. Args: revision: the revision as a dict + request: Absolute URIs will be generated if provided + + Returns: + An enriched revision dict filled with additional urls """ revision['url'] = reverse('api-1-revision', - url_args={'sha1_git': revision['id']}) + url_args={'sha1_git': revision['id']}, + request=request) revision['history_url'] = reverse('api-1-revision-log', - url_args={'sha1_git': revision['id']}) + url_args={'sha1_git': revision['id']}, + request=request) if 'directory' in revision: revision['directory_url'] = reverse( - 'api-1-directory', url_args={'sha1_git': revision['directory']}) + 'api-1-directory', + url_args={'sha1_git': revision['directory']}, + request=request) if 'parents' in revision: parents = [] for parent in revision['parents']: parents.append({ 'id': parent, - 'url': reverse('api-1-revision', url_args={'sha1_git': parent}) + 'url': reverse('api-1-revision', + url_args={'sha1_git': parent}, + request=request) }) revision['parents'] = parents if 'children' in revision: children = [] for child in revision['children']: children.append(reverse( - 'api-1-revision', url_args={'sha1_git': child})) + 'api-1-revision', + url_args={'sha1_git': child}, + request=request)) revision['children_urls'] = children if 'message_decoding_failed' in revision: - revision['message_url'] = \ - reverse('api-1-revision-raw-message', - url_args={'sha1_git': revision['id']}) + revision['message_url'] = reverse( + 'api-1-revision-raw-message', + url_args={'sha1_git': revision['id']}, + request=request) return revision + + +def enrich_snapshot(snapshot: Dict[str, Any], + request: Optional[HttpRequest] = None) -> Dict[str, Any]: + """Enrich snapshot with links to the branch targets + + Args: + snapshot: the snapshot as a dict + request: Absolute URIs will be generated if provided + + Returns: + An enriched snapshot dict filled with additional urls + """ + if 'branches' in snapshot: + snapshot['branches'] = { + k: enrich_object(v, request) if v else None + for k, v in snapshot['branches'].items() + } + for k, v in snapshot['branches'].items(): + if v and v['target_type'] == 'alias': + branch = resolve_branch_alias(snapshot, v) + if branch: + branch = enrich_object(branch, request) + v['target_url'] = branch['target_url'] + return snapshot + + +def enrich_origin(origin: Dict[str, Any], + request: Optional[HttpRequest] = None) -> Dict[str, Any]: + """Enrich origin dict with link to its visits + + Args: + origin: the origin as a dict + request: Absolute URIs will be generated if provided + + Returns: + An enriched origin dict filled with an additional url + """ + if 'url' in origin: + origin['origin_visits_url'] = reverse( + 'api-1-origin-visits', + url_args={'origin_url': origin['url']}, + request=request) + + return origin + + +def enrich_origin_visit(origin_visit: Dict[str, Any], *, + with_origin_link: bool, with_origin_visit_link: bool, + request: Optional[HttpRequest] = None + ) -> Dict[str, Any]: + """Enrich origin visit dict with additional links + + Args: + origin_visit: the origin visit as a dict + with_origin_link: whether to add link to origin + with_origin_visit_link: whether to add link to origin visit + request: Absolute URIs will be generated if provided + + Returns: + An enriched origin visit dict filled with additional urls + """ + ov = origin_visit + if with_origin_link: + ov['origin_url'] = reverse('api-1-origin', + url_args={'origin_url': ov['origin']}, + request=request) + if with_origin_visit_link: + ov['origin_visit_url'] = reverse('api-1-origin-visit', + url_args={'origin_url': ov['origin'], + 'visit_id': ov['visit']}, + request=request) + snapshot = ov['snapshot'] + if snapshot: + ov['snapshot_url'] = reverse('api-1-snapshot', + url_args={'snapshot_id': snapshot}, + request=request) + else: + ov['snapshot_url'] = None + return ov diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py index aca75ae2..5d956091 100644 --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -1,377 +1,384 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.exc import NotFoundExc from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api import utils from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/filetype/', 'api-1-content-filetype', checksum_args=['q']) @api_doc('/content/filetype/') @format_docstring() def api_content_filetype(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/filetype/ Get information about the detected MIME type of a content object. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is `sha1`. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string encoding: the detected content encoding :>json string id: the **sha1** identifier of the content :>json string mimetype: the detected MIME type of the content :>json object tool: information about the tool used to detect the content filetype {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/filetype/` """ # noqa return api_lookup( service.lookup_content_filetype, q, notfound_msg='No filetype information found for content {}.'.format(q), - enrich_fn=utils.enrich_metadata_endpoint) + enrich_fn=utils.enrich_metadata_endpoint, + request=request) @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/language/', 'api-1-content-language', checksum_args=['q']) @api_doc('/content/language/') @format_docstring() def api_content_language(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/language/ Get information about the programming language used in a content object. Note: this endpoint currently returns no data. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string id: the **sha1** identifier of the content :>json string lang: the detected programming language if any :>json object tool: information about the tool used to detect the programming language {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/language/` """ # noqa return api_lookup( service.lookup_content_language, q, notfound_msg='No language information found for content {}.'.format(q), - enrich_fn=utils.enrich_metadata_endpoint) + enrich_fn=utils.enrich_metadata_endpoint, + request=request) @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/license/', 'api-1-content-license', checksum_args=['q']) @api_doc('/content/license/') @format_docstring() def api_content_license(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/license/ Get information about the license of a content object. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string id: the **sha1** identifier of the content :>json array licenses: array of strings containing the detected license names if any :>json object tool: information about the tool used to detect the license {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/license/` """ # noqa return api_lookup( service.lookup_content_license, q, notfound_msg='No license information found for content {}.'.format(q), - enrich_fn=utils.enrich_metadata_endpoint) + enrich_fn=utils.enrich_metadata_endpoint, + request=request) @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/ctags/', 'api-1-content-ctags') @api_doc('/content/ctags/', tags=['hidden']) def api_content_ctags(request, q): """ Get information about all `Ctags `_-style symbols defined in a content object. """ return api_lookup( service.lookup_content_ctags, q, notfound_msg='No ctags symbol found for content {}.'.format(q), - enrich_fn=utils.enrich_metadata_endpoint) + enrich_fn=utils.enrich_metadata_endpoint, + request=request) @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/raw/', 'api-1-content-raw', checksum_args=['q']) @api_doc('/content/raw/', handle_response=True) def api_content_raw(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/raw/ Get the raw content of a content object (aka a "blob"), as a byte sequence. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :query string filename: if provided, the downloaded content will get that filename :resheader Content-Type: application/octet-stream **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/` """ # noqa def generate(content): yield content['data'] content_raw = service.lookup_content_raw(q) if not content_raw: raise NotFoundExc('Content %s is not found.' % q) filename = request.query_params.get('filename') if not filename: filename = 'content_%s_raw' % q.replace(':', '_') response = HttpResponse(generate(content_raw), content_type='application/octet-stream') response['Content-disposition'] = 'attachment; filename=%s' % filename return response @api_route(r'/content/symbol/(?P.+)/', 'api-1-content-symbol') @api_doc('/content/symbol/', tags=['hidden']) def api_content_symbol(request, q=None): """Search content objects by `Ctags `_-style symbol (e.g., function name, data type, method, ...). """ result = {} last_sha1 = request.query_params.get('last_sha1', None) per_page = int(request.query_params.get('per_page', '10')) def lookup_exp(exp, last_sha1=last_sha1, per_page=per_page): exp = list(service.lookup_expression(exp, last_sha1, per_page)) return exp if exp else None symbols = api_lookup( lookup_exp, q, notfound_msg="No indexed raw content match expression '{}'.".format(q), - enrich_fn=functools.partial(utils.enrich_content, top_url=True)) + enrich_fn=functools.partial(utils.enrich_content, top_url=True), + request=request) if symbols: nb_symbols = len(symbols) if nb_symbols == per_page: query_params = {} new_last_sha1 = symbols[-1]['sha1'] query_params['last_sha1'] = new_last_sha1 if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-1-content-symbol', url_args={'q': q}, - query_params=query_params) + query_params=query_params, + request=request) } result.update({ 'results': symbols }) return result @api_route(r'/content/known/search/', 'api-1-content-known', methods=['POST']) @api_route(r'/content/known/(?P(?!search).*)/', 'api-1-content-known') @api_doc('/content/known/', tags=['hidden']) @format_docstring() def api_check_content_known(request, q=None): """ .. http:get:: /api/1/content/known/(sha1)[,(sha1), ...,(sha1)]/ Check whether some content(s) (aka "blob(s)") is present in the archive based on its **sha1** checksum. :param string sha1: hexadecimal representation of the **sha1** checksum value for the content to check existence. Multiple values can be provided separated by ','. {common_headers} :>json array search_res: array holding the search result for each provided **sha1** :>json object search_stats: some statistics regarding the number of **sha1** provided and the percentage of those found in the archive **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1** has been provided **Example:** .. parsed-literal:: :swh_web_api:`content/known/dc2830a9e72f23c1dfebef4413003221baa5fb62,0c3f19cb47ebfbe643fb19fa94c874d18fa62d12/` """ # noqa response = {'search_res': None, 'search_stats': None} search_stats = {'nbfiles': 0, 'pct': 0} search_res = None queries = [] # GET: Many hash separated values request if q: hashes = q.split(',') for v in hashes: queries.append({'filename': None, 'sha1': v}) # POST: Many hash requests in post form submission elif request.method == 'POST': data = request.data # Remove potential inputs with no associated value for k, v in data.items(): if v is not None: if k == 'q' and len(v) > 0: queries.append({'filename': None, 'sha1': v}) elif v != '': queries.append({'filename': k, 'sha1': v}) if queries: lookup = service.lookup_multiple_hashes(queries) result = [] nb_queries = len(queries) for el in lookup: res_d = {'sha1': el['sha1'], 'found': el['found']} if 'filename' in el and el['filename']: res_d['filename'] = el['filename'] result.append(res_d) search_res = result nbfound = len([x for x in lookup if x['found']]) search_stats['nbfiles'] = nb_queries search_stats['pct'] = (nbfound / nb_queries) * 100 response['search_res'] = search_res response['search_stats'] = search_stats return response @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/', 'api-1-content', checksum_args=['q']) @api_doc('/content/') @format_docstring() def api_content_metadata(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/ Get information about a content (aka a "blob") object. In the archive, a content object is identified based on checksum values computed using various hashing algorithms. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. {common_headers} :>json object checksums: object holding the computed checksum values for the requested content :>json string data_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/raw/` for downloading the content raw bytes :>json string filetype_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/filetype/` for getting information about the content MIME type :>json string language_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/language/` for getting information about the programming language used in the content :>json number length: length of the content in bytes :>json string license_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/license/` for getting information about the license of the content **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: curl -i :swh_web_api:`content/sha1_git:fe95a46679d128ff167b7c55df5d02356c5a1ae1/` """ # noqa return api_lookup( service.lookup_content, q, notfound_msg='Content with {} not found.'.format(q), - enrich_fn=functools.partial(utils.enrich_content, query_string=q)) + enrich_fn=functools.partial(utils.enrich_content, query_string=q), + request=request) diff --git a/swh/web/api/views/directory.py b/swh/web/api/views/directory.py index 26313a39..14e87981 100644 --- a/swh/web/api/views/directory.py +++ b/swh/web/api/views/directory.py @@ -1,76 +1,78 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.api import utils from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup @api_route(r'/directory/(?P[0-9a-f]+)/', 'api-1-directory', checksum_args=['sha1_git']) @api_route(r'/directory/(?P[0-9a-f]+)/(?P.+)/', 'api-1-directory', checksum_args=['sha1_git']) @api_doc('/directory/') @format_docstring() def api_directory(request, sha1_git, path=None): """ .. http:get:: /api/1/directory/(sha1_git)/[(path)/] Get information about directory objects. Directories are identified by **sha1** checksums, compatible with Git directory identifiers. See :func:`swh.model.identifiers.directory_identifier` in our data model module for details about how they are computed. When given only a directory identifier, this endpoint returns information about the directory itself, returning its content (usually a list of directory entries). When given a directory identifier and a path, this endpoint returns information about the directory entry pointed by the relative path, starting path resolution from the given directory. :param string sha1_git: hexadecimal representation of the directory **sha1_git** identifier :param string path: optional parameter to get information about the directory entry pointed by that relative path {common_headers} :>jsonarr object checksums: object holding the computed checksum values for a directory entry (only for file entries) :>jsonarr string dir_id: **sha1_git** identifier of the requested directory :>jsonarr number length: length of a directory entry in bytes (only for file entries) for getting information about the content MIME type :>jsonarr string name: the directory entry name :>jsonarr number perms: permissions for the directory entry :>jsonarr string target: **sha1_git** identifier of the directory entry :>jsonarr string target_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` or :http:get:`/api/1/directory/(sha1_git)/[(path)/]` depending on the directory entry type :>jsonarr string type: the type of the directory entry, can be either ``dir``, ``file`` or ``rev`` **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested directory can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`directory/977fc4b98c0e85816348cebd3b12026407c368b6/` """ # noqa if path: error_msg_path = ('Entry with path %s relative to directory ' 'with sha1_git %s not found.') % (path, sha1_git) return api_lookup( service.lookup_directory_with_path, sha1_git, path, notfound_msg=error_msg_path, - enrich_fn=utils.enrich_directory) + enrich_fn=utils.enrich_directory, + request=request) else: error_msg_nopath = 'Directory with sha1_git %s not found.' % sha1_git return api_lookup( service.lookup_directory, sha1_git, notfound_msg=error_msg_nopath, - enrich_fn=utils.enrich_directory) + enrich_fn=utils.enrich_directory, + request=request) diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py index f93cae96..4cef36ec 100644 --- a/swh/web/api/views/identifiers.py +++ b/swh/web/api/views/identifiers.py @@ -1,62 +1,63 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.common.utils import resolve_swh_persistent_id from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route @api_route(r'/resolve/(?P.*)/', 'api-1-resolve-swh-pid') @api_doc('/resolve/') @format_docstring() def api_resolve_swh_pid(request, swh_id): """ .. http:get:: /api/1/resolve/(swh_id)/ Resolve a Software Heritage persistent identifier. Try to resolve a provided `persistent identifier `_ into an url for browsing the pointed archive object. If the provided identifier is valid, the existence of the object in the archive will also be checked. :param string swh_id: a Software Heritage persistent identifier :>json string browse_url: the url for browsing the pointed object :>json object metadata: object holding optional parts of the persistent identifier :>json string namespace: the persistent identifier namespace :>json string object_id: the hash identifier of the pointed object :>json string object_type: the type of the pointed object :>json number scheme_version: the scheme version of the persistent identifier {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid persistent identifier has been provided :statuscode 404: the pointed object does not exist in the archive **Example:** .. parsed-literal:: :swh_web_api:`resolve/swh:1:rev:96db9023b881d7cd9f379b0c154650d6c108e9a3;origin=https://github.com/openssl/openssl/` """ # noqa # try to resolve the provided pid swh_id_resolved = resolve_swh_persistent_id(swh_id) # id is well-formed, now check that the pointed # object is present in the archive, NotFoundExc # will be raised otherwise swh_id_parsed = swh_id_resolved['swh_id_parsed'] object_type = swh_id_parsed.object_type object_id = swh_id_parsed.object_id service.lookup_object(object_type, object_id) # id is well-formed and the pointed object exists swh_id_data = swh_id_parsed._asdict() - swh_id_data['browse_url'] = swh_id_resolved['browse_url'] + swh_id_data['browse_url'] = request.build_absolute_uri( + swh_id_resolved['browse_url']) return swh_id_data diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py index e2dcbf05..594a7943 100644 --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -1,502 +1,484 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from distutils.util import strtobool from functools import partial from swh.web.common import service from swh.web.common.exc import BadInputExc from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import reverse from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route +from swh.web.api.utils import enrich_origin, enrich_origin_visit from swh.web.api.views.utils import api_lookup DOC_RETURN_ORIGIN = ''' :>json string origin_visits_url: link to in order to get information about the visits for that origin :>json string url: the origin canonical url :>json string type: the type of software origin (deprecated value; types are now associated to visits instead of origins) :>json number id: the origin unique identifier (deprecated value; you should only refer to origins based on their URL) ''' DOC_RETURN_ORIGIN_ARRAY = \ DOC_RETURN_ORIGIN.replace(':>json', ':>jsonarr') DOC_RETURN_ORIGIN_VISIT = ''' :>json string date: ISO representation of the visit date (in UTC) :>json str origin: the origin canonical url :>json string origin_url: link to get information about the origin :>jsonarr string snapshot: the snapshot identifier of the visit (may be null if status is not **full**). :>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/` in order to get information about the snapshot of the visit (may be null if status is not **full**). :>json string status: status of the visit (either **full**, **partial** or **ongoing**) :>json number visit: the unique identifier of the visit ''' DOC_RETURN_ORIGIN_VISIT_ARRAY = \ DOC_RETURN_ORIGIN_VISIT.replace(':>json', ':>jsonarr') DOC_RETURN_ORIGIN_VISIT_ARRAY += ''' :>jsonarr number id: the unique identifier of the origin :>jsonarr string origin_visit_url: link to :http:get:`/api/1/origin/(origin_url)/visit/(visit_id)/` in order to get information about the visit ''' -def _enrich_origin(origin): - if 'url' in origin: - o = origin.copy() - o['origin_visits_url'] = reverse( - 'api-1-origin-visits', url_args={'origin_url': origin['url']}) - return o - - return origin - - -def _enrich_origin_visit(origin_visit, *, - with_origin_link, with_origin_visit_link): - ov = origin_visit.copy() - if with_origin_link: - ov['origin_url'] = reverse('api-1-origin', - url_args={'origin_url': ov['origin']}) - if with_origin_visit_link: - ov['origin_visit_url'] = reverse('api-1-origin-visit', - url_args={'origin_url': ov['origin'], - 'visit_id': ov['visit']}) - snapshot = ov['snapshot'] - if snapshot: - ov['snapshot_url'] = reverse('api-1-snapshot', - url_args={'snapshot_id': snapshot}) - else: - ov['snapshot_url'] = None - return ov - - @api_route(r'/origins/', 'api-1-origins') @api_doc('/origins/', noargs=True) @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origins(request): """ .. http:get:: /api/1/origins/ Get list of archived software origins. .. warning:: This endpoint used to provide an `origin_from` query parameter, and guarantee an order on results. This is no longer true, and only the Link header should be used for paginating through results. :query int origin_count: The maximum number of origins to return (default to 100, can not exceed 10000) {return_origin_array} {common_headers} {resheader_link} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origins?origin_count=500` """ origin_from = int(request.query_params.get('origin_from', '1')) origin_count = int(request.query_params.get('origin_count', '100')) origin_count = min(origin_count, 10000) results = api_lookup( service.lookup_origins, origin_from, origin_count+1, - enrich_fn=_enrich_origin) + enrich_fn=enrich_origin, + request=request) response = {'results': results, 'headers': {}} if len(results) > origin_count: origin_from = results.pop()['id'] response['headers']['link-next'] = reverse( 'api-1-origins', query_params={'origin_from': origin_from, - 'origin_count': origin_count}) + 'origin_count': origin_count}, + request=request) return response @api_route(r'/origin/(?P.+)/get/', 'api-1-origin') @api_doc('/origin/') @format_docstring(return_origin=DOC_RETURN_ORIGIN) def api_origin(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/get/ Get information about a software origin. :param string origin_url: the origin url {return_origin} {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/python/cpython/get/` """ ori_dict = { 'url': origin_url } error_msg = 'Origin with url %s not found.' % ori_dict['url'] return api_lookup( service.lookup_origin, ori_dict, notfound_msg=error_msg, - enrich_fn=_enrich_origin) + enrich_fn=enrich_origin, + request=request) @api_route(r'/origin/search/(?P.+)/', 'api-1-origin-search', throttle_scope='swh_api_origin_search') @api_doc('/origin/search/') @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origin_search(request, url_pattern): """ .. http:get:: /api/1/origin/search/(url_pattern)/ Search for software origins whose urls contain a provided string pattern or match a provided regular expression. The search is performed in a case insensitive way. .. warning:: This endpoint used to provide an `offset` query parameter, and guarantee an order on results. This is no longer true, and only the Link header should be used for paginating through results. :param string url_pattern: a string pattern :query int limit: the maximum number of found origins to return (bounded to 1000) :query boolean with_visit: if true, only return origins with at least one visit by Software heritage {return_origin_array} {common_headers} {resheader_link} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/search/python/?limit=2` """ result = {} limit = min(int(request.query_params.get('limit', '70')), 1000) page_token = request.query_params.get('page_token') with_visit = request.query_params.get('with_visit', 'false') (results, page_token) = api_lookup( service.search_origin, url_pattern, limit, bool(strtobool(with_visit)), page_token, - enrich_fn=_enrich_origin) + enrich_fn=enrich_origin, request=request) if page_token is not None: query_params = {} query_params['limit'] = limit query_params['page_token'] = page_token result['headers'] = { 'link-next': reverse('api-1-origin-search', url_args={'url_pattern': url_pattern}, - query_params=query_params) + query_params=query_params, + request=request) } result.update({ 'results': results }) return result @api_route(r'/origin/metadata-search/', 'api-1-origin-metadata-search') @api_doc('/origin/metadata-search/', noargs=True, need_params=True) @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origin_metadata_search(request): """ .. http:get:: /api/1/origin/metadata-search/ Search for software origins whose metadata (expressed as a JSON-LD/CodeMeta dictionary) match the provided criteria. For now, only full-text search on this dictionary is supported. :query str fulltext: a string that will be matched against origin metadata; results are ranked and ordered starting with the best ones. :query int limit: the maximum number of found origins to return (bounded to 100) {return_origin_array} {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe` """ fulltext = request.query_params.get('fulltext', None) limit = min(int(request.query_params.get('limit', '70')), 100) if not fulltext: content = '"fulltext" must be provided and non-empty.' raise BadInputExc(content) - results = api_lookup(service.search_origin_metadata, fulltext, limit) + results = api_lookup(service.search_origin_metadata, fulltext, limit, + request=request) return { 'results': results, } @api_route(r'/origin/(?P.*)/visits/', 'api-1-origin-visits') @api_doc('/origin/visits/') @format_docstring( return_origin_visit_array=DOC_RETURN_ORIGIN_VISIT_ARRAY) def api_origin_visits(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/visits/ Get information about all visits of a software origin. Visits are returned sorted in descending order according to their date. :param str origin_url: a software origin URL :query int per_page: specify the number of visits to list, for pagination purposes :query int last_visit: visit to start listing from, for pagination purposes {common_headers} {resheader_link} {return_origin_visit_array} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visits/` """ result = {} origin_query = {'url': origin_url} notfound_msg = 'No origin {} found'.format(origin_url) url_args_next = {'origin_url': origin_url} per_page = int(request.query_params.get('per_page', '10')) last_visit = request.query_params.get('last_visit') if last_visit: last_visit = int(last_visit) def _lookup_origin_visits( origin_query, last_visit=last_visit, per_page=per_page): all_visits = get_origin_visits(origin_query) all_visits.reverse() visits = [] if not last_visit: visits = all_visits[:per_page] else: for i, v in enumerate(all_visits): if v['visit'] == last_visit: visits = all_visits[i+1:i+1+per_page] break for v in visits: yield v results = api_lookup(_lookup_origin_visits, origin_query, notfound_msg=notfound_msg, - enrich_fn=partial(_enrich_origin_visit, + enrich_fn=partial(enrich_origin_visit, with_origin_link=False, - with_origin_visit_link=True)) + with_origin_visit_link=True), + request=request) if results: nb_results = len(results) if nb_results == per_page: new_last_visit = results[-1]['visit'] query_params = {} query_params['last_visit'] = new_last_visit if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-1-origin-visits', url_args=url_args_next, - query_params=query_params) + query_params=query_params, + request=request) } result.update({ 'results': results }) return result @api_route(r'/origin/(?P.*)/visit/latest/', 'api-1-origin-visit-latest', throttle_scope='swh_api_origin_visit_latest') @api_doc('/origin/visit/latest/') @format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT) def api_origin_visit_latest(request, origin_url=None): """ .. http:get:: /api/1/origin/(origin_url)/visit/latest/ Get information about the latest visit of a software origin. :param str origin_url: a software origin URL :query boolean require_snapshot: if true, only return a visit with a snapshot {common_headers} {return_origin_visit} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin or visit can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visit/latest/` """ require_snapshot = request.query_params.get('require_snapshot', 'false') return api_lookup( service.lookup_origin_visit_latest, origin_url, bool(strtobool(require_snapshot)), notfound_msg=('No visit for origin {} found' .format(origin_url)), - enrich_fn=partial(_enrich_origin_visit, + enrich_fn=partial(enrich_origin_visit, with_origin_link=True, - with_origin_visit_link=False)) + with_origin_visit_link=False), + request=request) @api_route(r'/origin/(?P.*)/visit/(?P[0-9]+)/', 'api-1-origin-visit') @api_doc('/origin/visit/') @format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT) def api_origin_visit(request, visit_id, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/visit/(visit_id)/ Get information about a specific visit of a software origin. :param str origin_url: a software origin URL :param int visit_id: a visit identifier {common_headers} {return_origin_visit} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin or visit can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visit/1/` """ return api_lookup( service.lookup_origin_visit, origin_url, int(visit_id), notfound_msg=('No visit {} for origin {} found' .format(visit_id, origin_url)), - enrich_fn=partial(_enrich_origin_visit, + enrich_fn=partial(enrich_origin_visit, with_origin_link=True, - with_origin_visit_link=False)) + with_origin_visit_link=False), + request=request) @api_route(r'/origin/(?P.+)' '/intrinsic-metadata', 'api-origin-intrinsic-metadata') @api_doc('/origin/intrinsic-metadata/') @format_docstring() def api_origin_intrinsic_metadata(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/intrinsic-metadata Get intrinsic metadata of a software origin (as a JSON-LD/CodeMeta dictionary). :param string origin_url: the origin url :>json string ???: intrinsic metadata field of the origin {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/python/cpython/intrinsic-metadata` """ # noqa ori_dict = { 'url': origin_url } error_msg = 'Origin with url %s not found' % ori_dict['url'] return api_lookup( service.lookup_origin_intrinsic_metadata, ori_dict, notfound_msg=error_msg, - enrich_fn=_enrich_origin) + enrich_fn=enrich_origin, + request=request) diff --git a/swh/web/api/views/release.py b/swh/web/api/views/release.py index ca765fb7..80b0a1f1 100644 --- a/swh/web/api/views/release.py +++ b/swh/web/api/views/release.py @@ -1,59 +1,60 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.api import utils from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup @api_route(r'/release/(?P[0-9a-f]+)/', 'api-1-release', checksum_args=['sha1_git']) @api_doc('/release/') @format_docstring() def api_release(request, sha1_git): """ .. http:get:: /api/1/release/(sha1_git)/ Get information about a release in the archive. Releases are identified by **sha1** checksums, compatible with Git tag identifiers. See :func:`swh.model.identifiers.release_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the release **sha1_git** identifier {common_headers} :>json object author: information about the author of the release :>json string date: ISO representation of the release date (in UTC) :>json string id: the release unique identifier :>json string message: the message associated to the release :>json string name: the name of the release :>json string target: the target identifier of the release :>json string target_type: the type of the target, can be either **release**, **revision**, **content**, **directory** :>json string target_url: a link to the adequate api url based on the target type **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested release can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`release/208f61cc7a5dbc9879ae6e5c2f95891e270f09ef/` """ error_msg = 'Release with sha1_git %s not found.' % sha1_git return api_lookup( service.lookup_release, sha1_git, notfound_msg=error_msg, - enrich_fn=utils.enrich_release) + enrich_fn=utils.enrich_release, + request=request) diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py index e43d1d69..363a3e66 100644 --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -1,256 +1,260 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.api import utils from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup DOC_RETURN_REVISION = ''' :>json object author: information about the author of the revision :>json object committer: information about the committer of the revision :>json string committer_date: ISO representation of the commit date (in UTC) :>json string date: ISO representation of the revision date (in UTC) :>json string directory: the unique identifier that revision points to :>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>json string id: the revision unique identifier :>json boolean merge: whether or not the revision corresponds to a merge commit :>json string message: the message associated to the revision :>json array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision ''' # noqa DOC_RETURN_REVISION_ARRAY = \ DOC_RETURN_REVISION.replace(':>json', ':>jsonarr') def _revision_directory_by(revision, path, request_path, limit=100, with_data=False): """ Compute the revision matching criterion's directory or content data. Args: revision: dictionary of criterions representing a revision to lookup path: directory's path to lookup request_path: request path which holds the original context to limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of with_data: indicate to retrieve the content's raw data if path resolves to a content. """ def enrich_directory_local(dir, context_url=request_path): return utils.enrich_directory(dir, context_url) rev_id, result = service.lookup_directory_through_revision( revision, path, limit=limit, with_data=with_data) content = result['content'] if result['type'] == 'dir': # dir_entries result['content'] = list(map(enrich_directory_local, content)) elif result['type'] == 'file': # content result['content'] = utils.enrich_content(content) elif result['type'] == 'rev': # revision result['content'] = utils.enrich_revision(content) return result @api_route(r'/revision/(?P[0-9a-f]+)/', 'api-1-revision', checksum_args=['sha1_git']) @api_doc('/revision/') @format_docstring(return_revision=DOC_RETURN_REVISION) def api_revision(request, sha1_git): """ .. http:get:: /api/1/revision/(sha1_git)/ Get information about a revision in the archive. Revisions are identified by **sha1** checksums, compatible with Git commit identifiers. See :func:`swh.model.identifiers.revision_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier {common_headers} {return_revision} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/` """ # noqa return api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git {} not found.'.format(sha1_git), - enrich_fn=utils.enrich_revision) + enrich_fn=utils.enrich_revision, + request=request) @api_route(r'/revision/(?P[0-9a-f]+)/raw/', 'api-1-revision-raw-message', checksum_args=['sha1_git']) @api_doc('/revision/raw/', tags=['hidden'], handle_response=True) def api_revision_raw_message(request, sha1_git): """Return the raw data of the message of revision identified by sha1_git """ raw = service.lookup_revision_message(sha1_git) response = HttpResponse(raw['message'], content_type='application/octet-stream') response['Content-disposition'] = \ 'attachment;filename=rev_%s_raw' % sha1_git return response @api_route(r'/revision/(?P[0-9a-f]+)/directory/', 'api-1-revision-directory', checksum_args=['sha1_git']) @api_route(r'/revision/(?P[0-9a-f]+)/directory/(?P.+)/', 'api-1-revision-directory', checksum_args=['sha1_git']) @api_doc('/revision/directory/') @format_docstring() def api_revision_directory(request, sha1_git, dir_path=None, with_data=False): """ .. http:get:: /api/1/revision/(sha1_git)/directory/[(path)/] Get information about directory (entry) objects associated to revisions. Each revision is associated to a single "root" directory. This endpoint behaves like :http:get:`/api/1/directory/(sha1_git)/[(path)/]`, but operates on the root directory associated to a given revision. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :param string path: optional parameter to get information about the directory entry pointed by that relative path {common_headers} :>json array content: directory entries as returned by :http:get:`/api/1/directory/(sha1_git)/[(path)/]` :>json string path: path of directory from the revision root one :>json string revision: the unique revision identifier :>json string type: the type of the directory **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/f1b94134a4b879bc55c3dacdb496690c8ebdc03f/directory/` """ # noqa return _revision_directory_by({'sha1_git': sha1_git}, dir_path, request.path, with_data=with_data) @api_route(r'/revision/(?P[0-9a-f]+)/log/', 'api-1-revision-log', checksum_args=['sha1_git']) @api_route(r'/revision/(?P[0-9a-f]+)' r'/prev/(?P[0-9a-f]*/*)/log/', 'api-1-revision-log', checksum_args=['sha1_git', 'prev_sha1s']) @api_doc('/revision/log/') @format_docstring(return_revision_array=DOC_RETURN_REVISION_ARRAY) def api_revision_log(request, sha1_git, prev_sha1s=None): """ .. http:get:: /api/1/revision/(sha1_git)[/prev/(prev_sha1s)]/log/ Get a list of all revisions heading to a given one, in other words show the commit log. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :param string prev_sha1s: optional parameter representing the navigation breadcrumbs (descendant revisions previously visited). If multiple values, use / as delimiter. If provided, revisions information will be added at the beginning of the returned list. :query int per_page: number of elements in the returned list, for pagination purpose {common_headers} {resheader_link} {return_revision_array} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/e1a315fa3fa734e2a6154ed7b5b9ae0eb8987aad/log/` """ # noqa result = {} per_page = int(request.query_params.get('per_page', '10')) def lookup_revision_log_with_limit(s, limit=per_page+1): return service.lookup_revision_log(s, limit) error_msg = 'Revision with sha1_git %s not found.' % sha1_git rev_get = api_lookup(lookup_revision_log_with_limit, sha1_git, notfound_msg=error_msg, - enrich_fn=utils.enrich_revision) + enrich_fn=utils.enrich_revision, + request=request) nb_rev = len(rev_get) if nb_rev == per_page+1: rev_backward = rev_get[:-1] new_last_sha1 = rev_get[-1]['id'] query_params = {} if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-1-revision-log', url_args={'sha1_git': new_last_sha1}, - query_params=query_params) + query_params=query_params, + request=request) } else: rev_backward = rev_get if not prev_sha1s: # no nav breadcrumbs, so we're done revisions = rev_backward else: rev_forward_ids = prev_sha1s.split('/') rev_forward = api_lookup( service.lookup_revision_multiple, rev_forward_ids, notfound_msg=error_msg, - enrich_fn=utils.enrich_revision) + enrich_fn=utils.enrich_revision, + request=request) revisions = rev_forward + rev_backward result.update({ 'results': revisions }) return result diff --git a/swh/web/api/views/snapshot.py b/swh/web/api/views/snapshot.py index d4b7b170..6027846b 100644 --- a/swh/web/api/views/snapshot.py +++ b/swh/web/api/views/snapshot.py @@ -1,124 +1,97 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.common.utils import reverse from swh.web.config import get_config from swh.web.api.apidoc import api_doc, format_docstring -from swh.web.api import utils from swh.web.api.apiurls import api_route +from swh.web.api.utils import enrich_snapshot from swh.web.api.views.utils import api_lookup @api_route(r'/snapshot/(?P[0-9a-f]+)/', 'api-1-snapshot', checksum_args=['snapshot_id']) @api_doc('/snapshot/') @format_docstring() def api_snapshot(request, snapshot_id): """ .. http:get:: /api/1/snapshot/(snapshot_id)/ Get information about a snapshot in the archive. A snapshot is a set of named branches, which are pointers to objects at any level of the Software Heritage DAG. It represents a full picture of an origin at a given time. As well as pointing to other objects in the Software Heritage DAG, branches can also be aliases, in which case their target is the name of another branch in the same snapshot, or dangling, in which case the target is unknown. A snapshot identifier is a salted sha1. See :func:`swh.model.identifiers.snapshot_identifier` in our data model module for details about how they are computed. :param sha1 snapshot_id: a snapshot identifier :query str branches_from: optional parameter used to skip branches whose name is lesser than it before returning them :query int branches_count: optional parameter used to restrain the amount of returned branches (default to 1000) :query str target_types: optional comma separated list parameter used to filter the target types of branch to return (possible values that can be contained in that list are ``content``, ``directory``, ``revision``, ``release``, ``snapshot`` or ``alias``) {common_headers} {resheader_link} :>json object branches: object containing all branches associated to the snapshot,for each of them the associated target type and id are given but also a link to get information about that target :>json string id: the unique identifier of the snapshot **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid snapshot identifier has been provided :statuscode 404: requested snapshot can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`snapshot/6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a/` """ - def _resolve_alias(snapshot, branch): - while branch and branch['target_type'] == 'alias': - if branch['target'] in snapshot['branches']: - branch = snapshot['branches'][branch['target']] - else: - snp = service.lookup_snapshot( - snapshot['id'], branches_from=branch['target'], - branches_count=1) - if snp and branch['target'] in snp['branches']: - branch = snp['branches'][branch['target']] - else: - branch = None - return branch - - def _enrich_snapshot(snapshot): - s = snapshot.copy() - if 'branches' in s: - s['branches'] = { - k: utils.enrich_object(v) if v else None - for k, v in s['branches'].items() - } - for k, v in s['branches'].items(): - if v and v['target_type'] == 'alias': - branch = _resolve_alias(snapshot, v) - if branch: - branch = utils.enrich_object(branch) - v['target_url'] = branch['target_url'] - return s - snapshot_content_max_size = get_config()['snapshot_content_max_size'] branches_from = request.GET.get('branches_from', '') branches_count = int(request.GET.get('branches_count', snapshot_content_max_size)) target_types = request.GET.get('target_types', None) target_types = target_types.split(',') if target_types else None results = api_lookup( service.lookup_snapshot, snapshot_id, branches_from, branches_count, target_types, notfound_msg='Snapshot with id {} not found.'.format(snapshot_id), - enrich_fn=_enrich_snapshot) + enrich_fn=enrich_snapshot, + request=request) response = {'results': results, 'headers': {}} if results['next_branch'] is not None: - response['headers']['link-next'] = \ - reverse('api-1-snapshot', - url_args={'snapshot_id': snapshot_id}, - query_params={'branches_from': results['next_branch'], - 'branches_count': branches_count, - 'target_types': target_types}) + response['headers']['link-next'] = reverse( + 'api-1-snapshot', + url_args={'snapshot_id': snapshot_id}, + query_params={'branches_from': results['next_branch'], + 'branches_count': branches_count, + 'target_types': target_types}, + request=request) return response diff --git a/swh/web/api/views/utils.py b/swh/web/api/views/utils.py index 0f117f0d..5ade36c1 100644 --- a/swh/web/api/views/utils.py +++ b/swh/web/api/views/utils.py @@ -1,73 +1,86 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from types import GeneratorType +from typing import Callable, Any, Optional, Mapping, Dict +from typing_extensions import Protocol + +from django.http import HttpRequest + from rest_framework.response import Response from rest_framework.decorators import api_view -from types import GeneratorType - from swh.web.common.exc import NotFoundExc from swh.web.api.apiurls import APIUrls, api_route -def api_lookup(lookup_fn, *args, - notfound_msg='Object not found', - enrich_fn=None): +class EnrichFunction(Protocol): + def __call__(self, input: Mapping[str, str], + request: Optional[HttpRequest]) -> Dict[str, str]: ... + + +def api_lookup(lookup_fn: Callable[..., Any], *args: Any, + notfound_msg: Optional[str] = 'Object not found', + enrich_fn: Optional[EnrichFunction] = None, + request: Optional[HttpRequest] = None): r""" Capture a redundant behavior of: - looking up the backend with a criteria (be it an identifier or checksum) passed to the function lookup_fn - if nothing is found, raise an NotFoundExc exception with error message notfound_msg. - Otherwise if something is returned: - either as list, map or generator, map the enrich_fn function to it and return the resulting data structure as list. - either as dict and pass to enrich_fn and return the dict enriched. Args: - lookup_fn: function expects one criteria and optional supplementary \*args. + - \*args: supplementary arguments to pass to lookup_fn. - notfound_msg: if nothing matching the criteria is found, raise NotFoundExc with this error message. - enrich_fn: Function to use to enrich the result returned by lookup_fn. Default to the identity function if not provided. - - \*args: supplementary arguments to pass to lookup_fn. + - request: Input HTTP request that will be provided as parameter + to enrich_fn. + Raises: NotFoundExp or whatever `lookup_fn` raises. """ if enrich_fn is None: - enrich_fn = (lambda x: x) + enrich_fn = (lambda x, request: x) res = lookup_fn(*args) if res is None: raise NotFoundExc(notfound_msg) - if isinstance(res, (map, list, GeneratorType)): - return [enrich_fn(x) for x in res] - return enrich_fn(res) + if isinstance(res, (list, GeneratorType)) or type(res) == map: + return [enrich_fn(x, request=request) for x in res] + return enrich_fn(res, request=request) @api_view(['GET', 'HEAD']) def api_home(request): return Response({}, template_name='api/api.html') APIUrls.add_url_pattern(r'^$', api_home, view_name='api-1-homepage') @api_route(r'/', 'api-1-endpoints') def api_endpoints(request): """Display the list of opened api endpoints. """ routes = APIUrls.get_app_endpoints().copy() for route, doc in routes.items(): doc['doc_intro'] = doc['docstring'].split('\n\n')[0] # Return a list of routes with consistent ordering env = { 'doc_routes': sorted(routes.items()) } return Response(env, template_name="api/endpoints.html") diff --git a/swh/web/api/views/vault.py b/swh/web/api/views/vault.py index 699526c1..6e33ac92 100644 --- a/swh/web/api/views/vault.py +++ b/swh/web/api/views/vault.py @@ -1,248 +1,252 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from django.shortcuts import redirect from django.views.decorators.cache import never_cache from swh.model import hashutil from swh.web.common import service, query from swh.web.common.utils import reverse from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup # XXX: a bit spaghetti. Would be better with class-based views. def _dispatch_cook_progress(request, obj_type, obj_id): hex_id = hashutil.hash_to_hex(obj_id) object_name = obj_type.split('_')[0].title() if request.method == 'GET': return api_lookup( service.vault_progress, obj_type, obj_id, notfound_msg=("{} '{}' was never requested." - .format(object_name, hex_id))) + .format(object_name, hex_id)), + request=request) elif request.method == 'POST': email = request.POST.get('email', request.GET.get('email', None)) return api_lookup( service.vault_cook, obj_type, obj_id, email, notfound_msg=("{} '{}' not found." - .format(object_name, hex_id))) + .format(object_name, hex_id)), + request=request) @api_route(r'/vault/directory/(?P[0-9a-f]+)/', 'api-1-vault-cook-directory', methods=['GET', 'POST'], checksum_args=['dir_id'], throttle_scope='swh_vault_cooking') @never_cache @api_doc('/vault/directory/') @format_docstring() def api_vault_cook_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/ .. http:post:: /api/1/vault/directory/(dir_id)/ Request the cooking of an archive for a directory or check its cooking status. That endpoint enables to create a vault cooking task for a directory through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/directory/(dir_id)/raw/`. Then to extract the cooked directory in the current one, use:: $ tar xvf path/to/directory.tar.gz :param string dir_id: the directory's sha1 identifier :query string email: e-mail to notify when the archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/directory/(dir_id)/raw/`) :>json string obj_type: the type of object to cook (directory or revision) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (either **new**, **pending**, **done** or **failed**) :>json string obj_id: the identifier of the object to cook **Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ['sha1'], 'Only sha1_git is supported.') res = _dispatch_cook_progress(request, 'directory', obj_id) res['fetch_url'] = reverse('api-1-vault-fetch-directory', url_args={'dir_id': dir_id}) return res @api_route(r'/vault/directory/(?P[0-9a-f]+)/raw/', 'api-1-vault-fetch-directory', checksum_args=['dir_id']) @api_doc('/vault/directory/raw/', handle_response=True) def api_vault_fetch_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/raw/ Fetch the cooked archive for a directory. See :http:get:`/api/1/vault/directory/(dir_id)/` to get more details on directory cooking. :param string dir_id: the directory's sha1 identifier :resheader Content-Type: application/octet-stream **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ['sha1'], 'Only sha1_git is supported.') res = api_lookup( service.vault_fetch, 'directory', obj_id, - notfound_msg="Directory with ID '{}' not found.".format(dir_id)) + notfound_msg="Directory with ID '{}' not found.".format(dir_id), + request=request) fname = '{}.tar.gz'.format(dir_id) response = HttpResponse(res, content_type='application/gzip') response['Content-disposition'] = 'attachment; filename={}'.format(fname) return response @api_route(r'/vault/revision/(?P[0-9a-f]+)/gitfast/', 'api-1-vault-cook-revision_gitfast', methods=['GET', 'POST'], checksum_args=['rev_id'], throttle_scope='swh_vault_cooking') @never_cache @api_doc('/vault/revision/gitfast/') @format_docstring() def api_vault_cook_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/ .. http:post:: /api/1/vault/revision/(rev_id)/gitfast/ Request the cooking of a gitfast archive for a revision or check its cooking status. That endpoint enables to create a vault cooking task for a revision through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting gitfast archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`. Then to import the revision in the current directory, use:: $ git init $ zcat path/to/revision.gitfast.gz | git fast-import $ git checkout HEAD :param string rev_id: the revision's sha1 identifier :query string email: e-mail to notify when the gitfast archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`) :>json string obj_type: the type of object to cook (directory or revision) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (new/pending/done/failed) :>json string obj_id: the identifier of the object to cook **Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid revision identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ['sha1'], 'Only sha1_git is supported.') res = _dispatch_cook_progress(request, 'revision_gitfast', obj_id) res['fetch_url'] = reverse('api-1-vault-fetch-revision_gitfast', url_args={'rev_id': rev_id}) return res @api_route(r'/vault/revision/(?P[0-9a-f]+)/gitfast/raw/', 'api-1-vault-fetch-revision_gitfast', checksum_args=['rev_id']) @api_doc('/vault/revision/gitfast/raw/', handle_response=True) def api_vault_fetch_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/raw/ Fetch the cooked gitfast archive for a revision. See :http:get:`/api/1/vault/revision/(rev_id)/gitfast/` to get more details on directory cooking. :param string rev_id: the revision's sha1 identifier :resheader Content-Type: application/octet-stream **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid revision identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ['sha1'], 'Only sha1_git is supported.') res = api_lookup( service.vault_fetch, 'revision_gitfast', obj_id, - notfound_msg="Revision with ID '{}' not found.".format(rev_id)) + notfound_msg="Revision with ID '{}' not found.".format(rev_id), + request=request) fname = '{}.gitfast.gz'.format(rev_id) response = HttpResponse(res, content_type='application/gzip') response['Content-disposition'] = 'attachment; filename={}'.format(fname) return response @api_route(r'/vault/revision_gitfast/(?P[0-9a-f]+)/raw/', 'api-1-vault-revision_gitfast-raw', checksum_args=['rev_id']) @api_doc('/vault/revision_gitfast/raw/', tags=['hidden'], handle_response=True) def _api_vault_revision_gitfast_raw(request, rev_id): """ The vault backend sends an email containing an invalid url to fetch a gitfast archive. So setup a redirection to the correct one as a temporary workaround. """ rev_gitfast_raw_url = reverse('api-1-vault-fetch-revision_gitfast', url_args={'rev_id': rev_id}) return redirect(rev_gitfast_raw_url) diff --git a/swh/web/common/swh_templatetags.py b/swh/web/common/swh_templatetags.py index 8f7261d0..81fdb4e6 100644 --- a/swh/web/common/swh_templatetags.py +++ b/swh/web/common/swh_templatetags.py @@ -1,187 +1,183 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from inspect import cleandoc import json import re from django import template from django.core.serializers.json import DjangoJSONEncoder from django.utils.safestring import mark_safe from docutils.core import publish_parts from docutils.writers.html4css1 import Writer, HTMLTranslator import sentry_sdk from swh.web.common.origin_save import get_savable_visit_types register = template.Library() class NoHeaderHTMLTranslator(HTMLTranslator): """ Docutils translator subclass to customize the generation of HTML from reST-formatted docstrings """ def __init__(self, document): super().__init__(document) self.body_prefix = [] self.body_suffix = [] def visit_bullet_list(self, node): self.context.append((self.compact_simple, self.compact_p)) self.compact_p = None self.compact_simple = self.is_compactable(node) self.body.append(self.starttag(node, 'ul', CLASS='docstring')) DOCSTRING_WRITER = Writer() DOCSTRING_WRITER.translator_class = NoHeaderHTMLTranslator @register.filter def safe_docstring_display(docstring): """ Utility function to htmlize reST-formatted documentation in browsable api. """ docstring = cleandoc(docstring) return publish_parts(docstring, writer=DOCSTRING_WRITER)['html_body'] @register.filter def urlize_links_and_mails(text): """Utility function for decorating api links in browsable api. Args: text: whose content matching links should be transformed into contextual API or Browse html links. Returns The text transformed if any link is found. The text as is otherwise. """ try: if 'href="' not in text: - text = re.sub(r'(/api/[^"<]*|/browse/[^"<]*|http.*$)', - r'\1', - text) + text = re.sub(r'(http.*)', r'\1', text) return re.sub(r'([^ <>"]+@[^ <>"]+)', - r'\1', - text) + r'\1', text) except Exception as exc: sentry_sdk.capture_exception(exc) return text @register.filter def urlize_header_links(text): """Utility function for decorating headers links in browsable api. Args text: Text whose content contains Link header value Returns: The text transformed with html link if any link is found. The text as is otherwise. """ links = text.split(',') ret = '' for i, link in enumerate(links): - ret += re.sub(r'<(/api/.*|/browse/.*)>', r'<\1>', - link) + ret += re.sub(r'<(http.*)>', r'<\1>', link) # add one link per line and align them if i != len(links) - 1: ret += '\n ' return ret @register.filter def jsonify(obj): """Utility function for converting a django template variable to JSON in order to use it in script tags. Args obj: Any django template context variable Returns: JSON representation of the variable. """ return mark_safe(json.dumps(obj, cls=DjangoJSONEncoder)) @register.filter def sub(value, arg): """Django template filter for subtracting two numbers Args: value (int/float): the value to subtract from arg (int/float): the value to subtract to Returns: int/float: The subtraction result """ return value - arg @register.filter def mul(value, arg): """Django template filter for multiplying two numbers Args: value (int/float): the value to multiply from arg (int/float): the value to multiply with Returns: int/float: The multiplication result """ return value * arg @register.filter def key_value(dict, key): """Django template filter to get a value in a dictionary. Args: dict (dict): a dictionary key (str): the key to lookup value Returns: The requested value in the dictionary """ return dict[key] @register.filter def visit_type_savable(visit_type): """Django template filter to check if a save request can be created for a given visit type. Args: visit_type (str): the type of visit Returns: If the visit type is saveable or not """ return visit_type in get_savable_visit_types() @register.filter def split(value, arg): """Django template filter to split a string. Args: value (str): the string to split arg (str): the split separator Returns: list: the split string parts """ return value.split(arg) diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py index 52f0f1bf..0780f9a8 100644 --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -1,346 +1,384 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import docutils.parsers.rst import docutils.utils import re from datetime import datetime, timezone from dateutil import parser as date_parser from dateutil import tz +from typing import Optional, Dict, Any + from django.urls import reverse as django_reverse -from django.http import QueryDict +from django.http import QueryDict, HttpRequest from prometheus_client.registry import CollectorRegistry from rest_framework.authentication import SessionAuthentication from swh.model.exceptions import ValidationError from swh.model.identifiers import ( persistent_identifier, parse_persistent_identifier, CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT ) from swh.web.common.exc import BadInputExc from swh.web.config import get_config + SWH_WEB_METRICS_REGISTRY = CollectorRegistry(auto_describe=True) swh_object_icons = { 'branch': 'fa fa-code-fork', 'branches': 'fa fa-code-fork', 'content': 'fa fa-file-text', 'directory': 'fa fa-folder', 'person': 'fa fa-user', 'revisions history': 'fa fa-history', 'release': 'fa fa-tag', 'releases': 'fa fa-tag', 'revision': 'octicon-git-commit', 'snapshot': 'fa fa-camera', 'visits': 'fa fa-calendar', } -def reverse(viewname, url_args=None, query_params=None, - current_app=None, urlconf=None): +def reverse(viewname: str, + url_args: Optional[Dict[str, Any]] = None, + query_params: Optional[Dict[str, Any]] = None, + current_app: Optional[str] = None, + urlconf: Optional[str] = None, + request: Optional[HttpRequest] = None) -> str: """An override of django reverse function supporting query parameters. Args: - viewname (str): the name of the django view from which to compute a url - url_args (dict): dictionary of url arguments indexed by their names - query_params (dict): dictionary of query parameters to append to the + viewname: the name of the django view from which to compute a url + url_args: dictionary of url arguments indexed by their names + query_params: dictionary of query parameters to append to the reversed url - current_app (str): the name of the django app tighten to the view - urlconf (str): url configuration module + current_app: the name of the django app tighten to the view + urlconf: url configuration module + request: build an absolute URI if provided Returns: str: the url of the requested view with processed arguments and query parameters """ if url_args: url_args = {k: v for k, v in url_args.items() if v is not None} url = django_reverse(viewname, urlconf=urlconf, kwargs=url_args, current_app=current_app) if query_params: query_params = {k: v for k, v in query_params.items() if v} if query_params and len(query_params) > 0: query_dict = QueryDict('', mutable=True) for k in sorted(query_params.keys()): query_dict[k] = query_params[k] url += ('?' + query_dict.urlencode(safe='/;:')) + if request is not None: + url = request.build_absolute_uri(url) + return url def datetime_to_utc(date): """Returns datetime in UTC without timezone info Args: date (datetime.datetime): input datetime with timezone info Returns: datetime.datetime: datetime in UTC without timezone info """ if date.tzinfo: return date.astimezone(tz.gettz('UTC')).replace(tzinfo=timezone.utc) else: return date def parse_timestamp(timestamp): """Given a time or timestamp (as string), parse the result as UTC datetime. Returns: datetime.datetime: a timezone-aware datetime representing the parsed value or None if the parsing fails. Samples: - 2016-01-12 - 2016-01-12T09:19:12+0100 - Today is January 1, 2047 at 8:21:00AM - 1452591542 """ if not timestamp: return None try: date = date_parser.parse(timestamp, ignoretz=False, fuzzy=True) return datetime_to_utc(date) except Exception: try: return datetime.utcfromtimestamp(float(timestamp)).replace( tzinfo=timezone.utc) except (ValueError, OverflowError) as e: raise BadInputExc(e) def shorten_path(path): """Shorten the given path: for each hash present, only return the first 8 characters followed by an ellipsis""" sha256_re = r'([0-9a-f]{8})[0-9a-z]{56}' sha1_re = r'([0-9a-f]{8})[0-9a-f]{32}' ret = re.sub(sha256_re, r'\1...', path) return re.sub(sha1_re, r'\1...', ret) def format_utc_iso_date(iso_date, fmt='%d %B %Y, %H:%M UTC'): """Turns a string representation of an ISO 8601 date string to UTC and format it into a more human readable one. For instance, from the following input string: '2017-05-04T13:27:13+02:00' the following one is returned: '04 May 2017, 11:27 UTC'. Custom format string may also be provided as parameter Args: iso_date (str): a string representation of an ISO 8601 date fmt (str): optional date formatting string Returns: str: a formatted string representation of the input iso date """ if not iso_date: return iso_date date = parse_timestamp(iso_date) return date.strftime(fmt) def gen_path_info(path): """Function to generate path data navigation for use with a breadcrumb in the swh web ui. For instance, from a path /folder1/folder2/folder3, it returns the following list:: [{'name': 'folder1', 'path': 'folder1'}, {'name': 'folder2', 'path': 'folder1/folder2'}, {'name': 'folder3', 'path': 'folder1/folder2/folder3'}] Args: path: a filesystem path Returns: list: a list of path data for navigation as illustrated above. """ path_info = [] if path: sub_paths = path.strip('/').split('/') path_from_root = '' for p in sub_paths: path_from_root += '/' + p path_info.append({'name': p, 'path': path_from_root.strip('/')}) return path_info def get_swh_persistent_id(object_type, object_id, scheme_version=1): """ Returns the persistent identifier for a swh object based on: * the object type * the object id * the swh identifiers scheme version Args: object_type (str): the swh object type (content/directory/release/revision/snapshot) object_id (str): the swh object id (hexadecimal representation of its hash value) scheme_version (int): the scheme version of the swh persistent identifiers Returns: str: the swh object persistent identifier Raises: BadInputExc: if the provided parameters do not enable to generate a valid identifier """ try: swh_id = persistent_identifier(object_type, object_id, scheme_version) except ValidationError as e: raise BadInputExc('Invalid object (%s) for swh persistent id. %s' % (object_id, e)) else: return swh_id def resolve_swh_persistent_id(swh_id, query_params=None): """ Try to resolve a Software Heritage persistent id into an url for browsing the pointed object. Args: swh_id (str): a Software Heritage persistent identifier query_params (django.http.QueryDict): optional dict filled with query parameters to append to the browse url Returns: dict: a dict with the following keys: * **swh_id_parsed (swh.model.identifiers.PersistentId)**: the parsed identifier * **browse_url (str)**: the url for browsing the pointed object Raises: BadInputExc: if the provided identifier can not be parsed """ try: swh_id_parsed = parse_persistent_identifier(swh_id) object_type = swh_id_parsed.object_type object_id = swh_id_parsed.object_id browse_url = None query_dict = QueryDict('', mutable=True) if query_params and len(query_params) > 0: for k in sorted(query_params.keys()): query_dict[k] = query_params[k] if 'origin' in swh_id_parsed.metadata: query_dict['origin'] = swh_id_parsed.metadata['origin'] if object_type == CONTENT: query_string = 'sha1_git:' + object_id fragment = '' if 'lines' in swh_id_parsed.metadata: lines = swh_id_parsed.metadata['lines'].split('-') fragment += '#L' + lines[0] if len(lines) > 1: fragment += '-L' + lines[1] browse_url = reverse('browse-content', url_args={'query_string': query_string}, query_params=query_dict) + fragment elif object_type == DIRECTORY: browse_url = reverse('browse-directory', url_args={'sha1_git': object_id}, query_params=query_dict) elif object_type == RELEASE: browse_url = reverse('browse-release', url_args={'sha1_git': object_id}, query_params=query_dict) elif object_type == REVISION: browse_url = reverse('browse-revision', url_args={'sha1_git': object_id}, query_params=query_dict) elif object_type == SNAPSHOT: browse_url = reverse('browse-snapshot', url_args={'snapshot_id': object_id}, query_params=query_dict) except ValidationError as ve: raise BadInputExc('Error when parsing identifier. %s' % ' '.join(ve.messages)) else: return {'swh_id_parsed': swh_id_parsed, 'browse_url': browse_url} def parse_rst(text, report_level=2): """ Parse a reStructuredText string with docutils. Args: text (str): string with reStructuredText markups in it report_level (int): level of docutils report messages to print (1 info 2 warning 3 error 4 severe 5 none) Returns: docutils.nodes.document: a parsed docutils document """ parser = docutils.parsers.rst.Parser() components = (docutils.parsers.rst.Parser,) settings = docutils.frontend.OptionParser( components=components).get_default_values() settings.report_level = report_level document = docutils.utils.new_document('rst-doc', settings=settings) parser.parse(text, document) return document def get_client_ip(request): """ Return the client IP address from an incoming HTTP request. Args: request (django.http.HttpRequest): the incoming HTTP request Returns: str: The client IP address """ x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR') if x_forwarded_for: ip = x_forwarded_for.split(',')[0] else: ip = request.META.get('REMOTE_ADDR') return ip def context_processor(request): """ Django context processor used to inject variables in all swh-web templates. """ return { 'swh_object_icons': swh_object_icons, 'available_languages': None, 'swh_client_config': get_config()['client_config'], } class EnforceCSRFAuthentication(SessionAuthentication): """ Helper class to enforce CSRF validation on a DRF view when a user is not authenticated. """ def authenticate(self, request): user = getattr(request._request, 'user', None) self.enforce_csrf(request) return (user, None) + + +def resolve_branch_alias(snapshot: Dict[str, Any], + branch: Optional[Dict[str, Any]] + ) -> Optional[Dict[str, Any]]: + """ + Resolve branch alias in snapshot content. + + Args: + snapshot: a full snapshot content + branch: a branch alias contained in the snapshot + Returns: + The real snapshot branch that got aliased. + """ + while branch and branch['target_type'] == 'alias': + if branch['target'] in snapshot['branches']: + branch = snapshot['branches'][branch['target']] + else: + from swh.web.common import service + snp = service.lookup_snapshot( + snapshot['id'], branches_from=branch['target'], + branches_count=1) + if snp and branch['target'] in snp['branches']: + branch = snp['branches'][branch['target']] + else: + branch = None + return branch diff --git a/swh/web/templates/api/apidoc.html b/swh/web/templates/api/apidoc.html index f377cb00..b5a7ef35 100644 --- a/swh/web/templates/api/apidoc.html +++ b/swh/web/templates/api/apidoc.html @@ -1,183 +1,183 @@ {% extends "layout.html" %} {% comment %} Copyright (C) 2015-2019 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load swh_templatetags %} {% block title %}{{ heading }} – Software Heritage API {% endblock %} {% block navbar-content %} {% endblock %} {% block content %}
{% if description %}

Description

{{ description | safe_docstring_display | safe }}
{% endif %} {% if response_data is not None %}

Request

-
{{ request.method }} {{ request.path }}
+
{{ request.method }} {{ request.build_absolute_uri }}

Response

{% if status_code != 200 %}
Status Code
{{ status_code }}
{% endif %} {% if headers_data %}
Headers
{% for header_name, header_value in headers_data.items %}
{{ header_name }} {{ header_value | urlize_header_links | safe }}
{% endfor %} {% endif %}
Body
{{ response_data | urlize_links_and_mails | safe }}
{% endif %}
{% if urls and urls|length > 0 %}
{% for url in urls %} {% endfor %}
URL Allowed Methods
{{ url.rule | safe_docstring_display | safe }} {{ url.methods | dictsort:0 | join:', ' }}

{% endif %} {% if args and args|length > 0 %}

Arguments

{% for arg in args %}
{{ arg.name }} ({{ arg.type }})
{{ arg.doc | safe_docstring_display | safe }}
{% endfor %}

{% endif %} {% if params and params|length > 0 %}

Query parameters

{% for param in params %}
{{ param.name }} ({{ param.type }})
{{ param.doc | safe_docstring_display | safe }}
{% endfor %}

{% endif %} {% if reqheaders and reqheaders|length > 0 %}

Request headers

{% for header in reqheaders %}
{{ header.name }}
{{ header.doc | safe_docstring_display | safe }}
{% endfor %}

{% endif %} {% if resheaders and resheaders|length > 0 %}

Response headers

{% for header in resheaders %}
{{ header.name }}
{{ header.doc | safe_docstring_display | safe }}
{% endfor %}

{% endif %} {% if return_type %}

Returns

{{ return_type }}

{% if return_type == 'array' %} an array of objects containing the following keys: {% elif return_type == 'octet stream' %} the raw data as an octet stream {% else %} an object containing the following keys: {% endif %} {{ returns_list | safe_docstring_display | safe }}


{% endif %} {% if status_codes and status_codes|length > 0 %}

HTTP status codes

{% for status in status_codes %}
{{ status.code }}
{{ status.doc | safe_docstring_display | safe }}
{% endfor %}

{% endif %} {% if examples and examples|length > 0 %}

Examples

{% for example in examples %}
{{ example }}
{% endfor %}
{% endif %}
{% endblock %} diff --git a/swh/web/templates/browse/person.html b/swh/web/templates/browse/person.html index 57e21bfb..cf84004b 100644 --- a/swh/web/templates/browse/person.html +++ b/swh/web/templates/browse/person.html @@ -1,36 +1,36 @@ {% extends "./browse.html" %} {% comment %} Copyright (C) 2017-2018 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load swh_templatetags %} {% block navbar-content %} {% if not snapshot_context %}

Browse person

{% else %} {{ block.super }} {% endif %} {% endblock %} {% block swh-browse-content %} {% for key, val in swh_object_metadata.items|dictsort:"0.lower" %} {% endfor %}
{% endblock %} \ No newline at end of file diff --git a/swh/web/tests/api/test_api_lookup.py b/swh/web/tests/api/test_api_lookup.py index 10b0c20d..6c678078 100644 --- a/swh/web/tests/api/test_api_lookup.py +++ b/swh/web/tests/api/test_api_lookup.py @@ -1,115 +1,115 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.web.common.exc import NotFoundExc from swh.web.api.views import utils def test_genericapi_lookup_nothing_is_found(): def test_generic_lookup_fn(sha1, another_unused_arg): assert another_unused_arg == 'unused_arg' assert sha1 == 'sha1' return None notfound_msg = 'This will be raised because None is returned.' with pytest.raises(NotFoundExc) as e: utils.api_lookup( test_generic_lookup_fn, 'sha1', 'unused_arg', notfound_msg=notfound_msg) assert e.match(notfound_msg) def test_generic_api_map_are_enriched_and_transformed_to_list(): def test_generic_lookup_fn_1(criteria0, param0, param1): assert criteria0 == 'something' return map(lambda x: x + 1, [1, 2, 3]) actual_result = utils.api_lookup( test_generic_lookup_fn_1, 'something', 'some param 0', 'some param 1', notfound_msg=('This is not the error message you are looking for. ' 'Move along.'), - enrich_fn=lambda x: x * 2) + enrich_fn=lambda x, request: x * 2) assert actual_result == [4, 6, 8] def test_generic_api_list_are_enriched_too(): def test_generic_lookup_fn_2(crit): assert crit == 'something' return ['a', 'b', 'c'] actual_result = utils.api_lookup( test_generic_lookup_fn_2, 'something', notfound_msg=('Not the error message you are looking for, it is. ' 'Along, you move!'), - enrich_fn=lambda x: ''. join(['=', x, '='])) + enrich_fn=lambda x, request: ''. join(['=', x, '='])) assert actual_result == ['=a=', '=b=', '=c='] def test_generic_api_generator_are_enriched_and_returned_as_list(): def test_generic_lookup_fn_3(crit): assert crit == 'crit' return (i for i in [4, 5, 6]) actual_result = utils.api_lookup( test_generic_lookup_fn_3, 'crit', notfound_msg='Move!', - enrich_fn=lambda x: x - 1) + enrich_fn=lambda x, request: x - 1) assert actual_result == [3, 4, 5] def test_generic_api_simple_data_are_enriched_and_returned_too(): def test_generic_lookup_fn_4(crit): assert crit == '123' return {'a': 10} - def test_enrich_data(x): + def test_enrich_data(x, request): x['a'] = x['a'] * 10 return x actual_result = utils.api_lookup( test_generic_lookup_fn_4, '123', notfound_msg='Nothing to do', enrich_fn=test_enrich_data) assert actual_result == {'a': 100} def test_api_lookup_not_found(): notfound_msg = 'this is the error message raised as it is None' with pytest.raises(NotFoundExc) as e: utils.api_lookup( lambda x: None, 'something', notfound_msg=notfound_msg) assert e.match(notfound_msg) def test_api_lookup_with_result(): actual_result = utils.api_lookup( lambda x: x + '!', 'something', notfound_msg='this is the error which won\'t be used here') assert actual_result == 'something!' def test_api_lookup_with_result_as_map(): actual_result = utils.api_lookup( lambda x: map(lambda y: y+1, x), [1, 2, 3], notfound_msg='this is the error which won\'t be used here') assert actual_result == [2, 3, 4] diff --git a/swh/web/tests/api/test_apiresponse.py b/swh/web/tests/api/test_apiresponse.py index 6ec7ecd0..c82f97b0 100644 --- a/swh/web/tests/api/test_apiresponse.py +++ b/swh/web/tests/api/test_apiresponse.py @@ -1,159 +1,144 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from swh.web.api.apiresponse import ( compute_link_header, transform, make_api_response, filter_by_fields ) -def test_compute_link_header(api_request_factory): +def test_compute_link_header(): next_link = '/api/endpoint/next' prev_link = '/api/endpoint/prev' rv = { 'headers': {'link-next': next_link, 'link-prev': prev_link}, 'results': [1, 2, 3] } options = {} - request = api_request_factory.get('/api/endpoint/') - - headers = compute_link_header(request, rv, options) + headers = compute_link_header(rv, options) assert headers == { - 'Link': (f'<{request.build_absolute_uri(next_link)}>; rel="next",' - f'<{request.build_absolute_uri(prev_link)}>; rel="previous"') + 'Link': (f'<{next_link}>; rel="next",' + f'<{prev_link}>; rel="previous"') } -def test_compute_link_header_nothing_changed(api_request_factory): +def test_compute_link_header_nothing_changed(): rv = {} options = {} - request = api_request_factory.get('/api/test/path/') - - headers = compute_link_header(request, rv, options) + headers = compute_link_header(rv, options) assert headers == {} -def test_compute_link_header_nothing_changed_2(api_request_factory): +def test_compute_link_header_nothing_changed_2(): rv = {'headers': {}} options = {} - request = api_request_factory.get('/api/test/path/') - - headers = compute_link_header(request, rv, options) + headers = compute_link_header(rv, options) assert headers == {} def test_transform_only_return_results_1(): rv = {'results': {'some-key': 'some-value'}} - assert transform(rv) == {'some-key': 'some-value'} def test_transform_only_return_results_2(): rv = {'headers': {'something': 'do changes'}, 'results': {'some-key': 'some-value'}} - assert transform(rv) == {'some-key': 'some-value'} def test_transform_do_remove_headers(): rv = {'headers': {'something': 'do changes'}, 'some-key': 'some-value'} - assert transform(rv) == {'some-key': 'some-value'} def test_transform_do_nothing(): rv = {'some-key': 'some-value'} - assert transform(rv) == {'some-key': 'some-value'} def test_swh_multi_response_mimetype(mocker, api_request_factory): mock_shorten_path = mocker.patch('swh.web.api.apiresponse.shorten_path') mock_filter = mocker.patch('swh.web.api.apiresponse.filter_by_fields') mock_json = mocker.patch('swh.web.api.apiresponse.json') data = { 'data': [12, 34], 'id': 'adc83b19e793491b1c6ea0fd8b46cd9f32e592fc' } mock_filter.return_value = data mock_shorten_path.return_value = 'my_short_path' accepted_response_formats = {'html': 'text/html', 'yaml': 'application/yaml', 'json': 'application/json'} for format in accepted_response_formats: request = api_request_factory.get('/api/test/path/') mime_type = accepted_response_formats[format] setattr(request, 'accepted_media_type', mime_type) if mime_type == 'text/html': expected_data = { 'response_data': json.dumps(data), - 'request': { - 'path': request.path, - 'method': request.method, - 'absolute_uri': request.build_absolute_uri() - }, 'headers_data': {}, 'heading': 'my_short_path', 'status_code': 200 } mock_json.dumps.return_value = json.dumps(data) else: expected_data = data rv = make_api_response(request, data) mock_filter.assert_called_with(request, data) assert rv.status_code == 200, rv.data assert rv.data == expected_data if mime_type == 'text/html': assert rv.template_name == 'api/apidoc.html' def test_swh_filter_renderer_do_nothing(api_request_factory): input_data = {'a': 'some-data'} request = api_request_factory.get('/api/test/path/', data={}) setattr(request, 'query_params', request.GET) actual_data = filter_by_fields(request, input_data) assert actual_data == input_data def test_swh_filter_renderer_do_filter(mocker, api_request_factory): mock_ffk = mocker.patch('swh.web.api.apiresponse.utils.filter_field_keys') mock_ffk.return_value = {'a': 'some-data'} request = api_request_factory.get('/api/test/path/', data={'fields': 'a,c'}) setattr(request, 'query_params', request.GET) input_data = {'a': 'some-data', 'b': 'some-other-data'} actual_data = filter_by_fields(request, input_data) assert actual_data == {'a': 'some-data'} mock_ffk.assert_called_once_with(input_data, {'a', 'c'}) diff --git a/swh/web/tests/api/test_utils.py b/swh/web/tests/api/test_utils.py index 6c3c07d7..5a906029 100644 --- a/swh/web/tests/api/test_utils.py +++ b/swh/web/tests/api/test_utils.py @@ -1,545 +1,601 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +import random + +from hypothesis import given + +from swh.model.hashutil import DEFAULT_ALGORITHMS + from swh.web.api import utils +from swh.web.common.origin_visits import get_origin_visits +from swh.web.common.utils import reverse, resolve_branch_alias +from swh.web.tests.strategies import ( + release, directory, content, revision, snapshot, origin +) url_map = [ { 'rule': '/other/', 'methods': set(['GET', 'POST', 'HEAD']), 'endpoint': 'foo' }, { 'rule': '/some/old/url/', 'methods': set(['GET', 'POST']), 'endpoint': 'blablafn' }, { 'rule': '/other/old/url/', 'methods': set(['GET', 'HEAD']), 'endpoint': 'bar' }, { 'rule': '/other', 'methods': set([]), 'endpoint': None }, { 'rule': '/other2', 'methods': set([]), 'endpoint': None } ] -sample_content_hashes = { - 'blake2s256': ('791e07fcea240ade6dccd0a9309141673' - 'c31242cae9c237cf3855e151abc78e9'), - 'sha1': 'dc2830a9e72f23c1dfebef4413003221baa5fb62', - 'sha1_git': 'fe95a46679d128ff167b7c55df5d02356c5a1ae1', - 'sha256': ('b5c7fe0536f44ef60c8780b6065d30bca74a5cd06' - 'd78a4a71ba1ad064770f0c9') -} - def test_filter_field_keys_dict_unknown_keys(): actual_res = utils.filter_field_keys( {'directory': 1, 'file': 2, 'link': 3}, {'directory1', 'file2'}) assert actual_res == {} def test_filter_field_keys_dict(): actual_res = utils.filter_field_keys( {'directory': 1, 'file': 2, 'link': 3}, {'directory', 'link'}) assert actual_res == {'directory': 1, 'link': 3} def test_filter_field_keys_list_unknown_keys(): actual_res = utils.filter_field_keys( [{'directory': 1, 'file': 2, 'link': 3}, {'1': 1, '2': 2, 'link': 3}], {'d'}) assert actual_res == [{}, {}] def test_filter_field_keys_map(): actual_res = utils.filter_field_keys( map(lambda x: {'i': x['i']+1, 'j': x['j']}, [{'i': 1, 'j': None}, {'i': 2, 'j': None}, {'i': 3, 'j': None}]), {'i'}) assert list(actual_res) == [{'i': 2}, {'i': 3}, {'i': 4}] def test_filter_field_keys_list(): actual_res = utils.filter_field_keys( [{'directory': 1, 'file': 2, 'link': 3}, {'dir': 1, 'fil': 2, 'lin': 3}], {'directory', 'dir'}) assert actual_res == [{'directory': 1}, {'dir': 1}] def test_filter_field_keys_other(): input_set = {1, 2} actual_res = utils.filter_field_keys(input_set, {'a', '1'}) assert actual_res == input_set def test_person_to_string(): assert utils.person_to_string({'name': 'raboof', 'email': 'foo@bar'}) == 'raboof ' -def test_enrich_release_0(): +def test_enrich_release_empty(): actual_release = utils.enrich_release({}) assert actual_release == {} -def test_enrich_release_1(mocker): - - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - - def reverse_test_context(view_name, url_args): - if view_name == 'api-1-content': - id = url_args['q'] - return '/api/1/content/%s/' % id - else: - raise ValueError('This should not happened so fail if it does.') - - mock_django_reverse.side_effect = reverse_test_context - - actual_release = utils.enrich_release({ - 'target': '123', - 'target_type': 'content', - 'author': { - 'id': 100, - 'name': 'author release name', - 'email': 'author@email', - }, - }) - - assert actual_release == { - 'target': '123', - 'target_type': 'content', - 'target_url': '/api/1/content/sha1_git:123/', - 'author': { - 'id': 100, - 'name': 'author release name', - 'email': 'author@email', - }, - } +@given(release()) +def test_enrich_release_content_target(api_request_factory, + archive_data, release): - mock_django_reverse.assert_has_calls([ - mocker.call('api-1-content', url_args={'q': 'sha1_git:123'}), - ]) + release_data = archive_data.release_get(release) + release_data['target_type'] = 'content' + url = reverse('api-1-release', url_args={'sha1_git': release}) + request = api_request_factory.get(url) -def test_enrich_release_2(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - mock_django_reverse.return_value = '/api/1/dir/23/' + actual_release = utils.enrich_release(release_data, request) - actual_release = utils.enrich_release({'target': '23', - 'target_type': 'directory'}) + release_data['target_url'] = reverse( + 'api-1-content', + url_args={'q': f'sha1_git:{release_data["target"]}'}, + request=request) - assert actual_release == { - 'target': '23', - 'target_type': 'directory', - 'target_url': '/api/1/dir/23/' - } + assert actual_release == release_data - mock_django_reverse.assert_called_once_with('api-1-directory', - url_args={'sha1_git': '23'}) +@given(release()) +def test_enrich_release_directory_target(api_request_factory, + archive_data, release): -def test_enrich_release_3(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - mock_django_reverse.return_value = '/api/1/rev/3/' + release_data = archive_data.release_get(release) + release_data['target_type'] = 'directory' - actual_release = utils.enrich_release({'target': '3', - 'target_type': 'revision'}) + url = reverse('api-1-release', url_args={'sha1_git': release}) + request = api_request_factory.get(url) - assert actual_release == { - 'target': '3', - 'target_type': 'revision', - 'target_url': '/api/1/rev/3/' - } + actual_release = utils.enrich_release(release_data, request) - mock_django_reverse.assert_called_once_with('api-1-revision', - url_args={'sha1_git': '3'}) + release_data['target_url'] = reverse( + 'api-1-directory', + url_args={'sha1_git': release_data['target']}, + request=request) + assert actual_release == release_data -def test_enrich_release_4(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - mock_django_reverse.return_value = '/api/1/rev/4/' - actual_release = utils.enrich_release({'target': '4', - 'target_type': 'release'}) +@given(release()) +def test_enrich_release_revision_target(api_request_factory, + archive_data, release): - assert actual_release == { - 'target': '4', - 'target_type': 'release', - 'target_url': '/api/1/rev/4/' - } + release_data = archive_data.release_get(release) + release_data['target_type'] = 'revision' + + url = reverse('api-1-release', url_args={'sha1_git': release}) + request = api_request_factory.get(url) + + actual_release = utils.enrich_release(release_data, request) + + release_data['target_url'] = reverse( + 'api-1-revision', + url_args={'sha1_git': release_data['target']}, + request=request) - mock_django_reverse.assert_called_once_with('api-1-release', - url_args={'sha1_git': '4'}) + assert actual_release == release_data -def test_enrich_directory_no_type(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') +@given(release()) +def test_enrich_release_release_target(api_request_factory, + archive_data, release): + + release_data = archive_data.release_get(release) + release_data['target_type'] = 'release' + + url = reverse('api-1-release', url_args={'sha1_git': release}) + request = api_request_factory.get(url) + + actual_release = utils.enrich_release(release_data, request) + + release_data['target_url'] = reverse( + 'api-1-release', + url_args={'sha1_git': release_data['target']}, + request=request) + + assert actual_release == release_data + + +def test_enrich_directory_no_type(): assert utils.enrich_directory({'id': 'dir-id'}) == {'id': 'dir-id'} - mock_django_reverse.return_value = '/api/content/sha1_git:123/' - actual_directory = utils.enrich_directory({ - 'id': 'dir-id', - 'type': 'file', - 'target': '123', - }) +@given(directory()) +def test_enrich_directory_with_type(api_request_factory, + archive_data, directory): - assert actual_directory == { - 'id': 'dir-id', - 'type': 'file', - 'target': '123', - 'target_url': '/api/content/sha1_git:123/', - } + dir_content = archive_data.directory_ls(directory) - mock_django_reverse.assert_called_once_with( - 'api-1-content', url_args={'q': 'sha1_git:123'}) - - -def test_enrich_directory_with_context_and_type_file(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - mock_django_reverse.return_value = '/api/content/sha1_git:123/' - - actual_directory = utils.enrich_directory({ - 'id': 'dir-id', - 'type': 'file', - 'name': 'hy', - 'target': '789', - }, context_url='/api/revision/revsha1/directory/prefix/path/') - - assert actual_directory == { - 'id': 'dir-id', - 'type': 'file', - 'name': 'hy', - 'target': '789', - 'target_url': '/api/content/sha1_git:123/', - 'file_url': '/api/revision/revsha1/directory' - '/prefix/path/hy/' - } + dir_entry = random.choice(dir_content) - mock_django_reverse.assert_called_once_with( - 'api-1-content', url_args={'q': 'sha1_git:789'}) - - -def test_enrich_directory_with_context_and_type_dir(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - mock_django_reverse.return_value = '/api/directory/456/' - - actual_directory = utils.enrich_directory({ - 'id': 'dir-id', - 'type': 'dir', - 'name': 'emacs-42', - 'target_type': 'file', - 'target': '456', - }, context_url='/api/revision/origin/2/directory/some/prefix/path/') - - assert actual_directory == { - 'id': 'dir-id', - 'type': 'dir', - 'target_type': 'file', - 'name': 'emacs-42', - 'target': '456', - 'target_url': '/api/directory/456/', - 'dir_url': '/api/revision/origin/2/directory' - '/some/prefix/path/emacs-42/' - } + url = reverse('api-1-directory', url_args={'sha1_git': directory}) + request = api_request_factory.get(url) - mock_django_reverse.assert_called_once_with('api-1-directory', - url_args={'sha1_git': '456'}) + actual_directory = utils.enrich_directory(dir_entry, request) + + if dir_entry['type'] == 'file': + dir_entry['target_url'] = reverse( + 'api-1-content', + url_args={'q': f'sha1_git:{dir_entry["target"]}'}, + request=request) + + elif dir_entry['type'] == 'dir': + dir_entry['target_url'] = reverse( + 'api-1-directory', + url_args={'sha1_git': dir_entry['target']}, + request=request) + + elif dir_entry['type'] == 'rev': + dir_entry['target_url'] = reverse( + 'api-1-revision', + url_args={'sha1_git': dir_entry['target']}, + request=request) + + assert actual_directory == dir_entry def test_enrich_content_without_hashes(): assert utils.enrich_content({'id': '123'}) == {'id': '123'} -def test_enrich_content_with_hashes(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - for algo, hash in sample_content_hashes.items(): - - query_string = '%s:%s' % (algo, hash) - - mock_django_reverse.side_effect = [ - '/api/content/%s/raw/' % query_string, - '/api/filetype/%s/' % query_string, - '/api/language/%s/' % query_string, - '/api/license/%s/' % query_string - ] - - enriched_content = utils.enrich_content({algo: hash}, - query_string=query_string) - - assert enriched_content == { - algo: hash, - 'data_url': '/api/content/%s/raw/' % query_string, - 'filetype_url': '/api/filetype/%s/' % query_string, - 'language_url': '/api/language/%s/' % query_string, - 'license_url': '/api/license/%s/' % query_string, - } - - mock_django_reverse.assert_has_calls([ - mocker.call('api-1-content-raw', url_args={'q': query_string}), - mocker.call('api-1-content-filetype', - url_args={'q': query_string}), - mocker.call('api-1-content-language', - url_args={'q': query_string}), - mocker.call('api-1-content-license', - url_args={'q': query_string}), - ]) - - mock_django_reverse.reset() - - -def test_enrich_content_with_hashes_and_top_level_url(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - for algo, hash in sample_content_hashes.items(): - - query_string = '%s:%s' % (algo, hash) - - mock_django_reverse.side_effect = [ - '/api/content/%s/' % query_string, - '/api/content/%s/raw/' % query_string, - '/api/filetype/%s/' % query_string, - '/api/language/%s/' % query_string, - '/api/license/%s/' % query_string, - ] - - enriched_content = utils.enrich_content({algo: hash}, top_url=True, - query_string=query_string) - - assert enriched_content == { - algo: hash, - 'content_url': '/api/content/%s/' % query_string, - 'data_url': '/api/content/%s/raw/' % query_string, - 'filetype_url': '/api/filetype/%s/' % query_string, - 'language_url': '/api/language/%s/' % query_string, - 'license_url': '/api/license/%s/' % query_string, - } - - mock_django_reverse.assert_has_calls([ - mocker.call('api-1-content', url_args={'q': query_string}), - mocker.call('api-1-content-raw', url_args={'q': query_string}), - mocker.call('api-1-content-filetype', - url_args={'q': query_string}), - mocker.call('api-1-content-language', - url_args={'q': query_string}), - mocker.call('api-1-content-license', url_args={'q': query_string}), - ]) - - mock_django_reverse.reset() - - -def _reverse_context_test(view_name, url_args): - if view_name == 'api-1-revision': - return '/api/revision/%s/' % url_args['sha1_git'] - elif view_name == 'api-1-revision-context': - return ('/api/revision/%s/prev/%s/' % - (url_args['sha1_git'], url_args['context'])) - elif view_name == 'api-1-revision-log': - if 'prev_sha1s' in url_args: - return ('/api/revision/%s/prev/%s/log/' % - (url_args['sha1_git'], url_args['prev_sha1s'])) - else: - return '/api/revision/%s/log/' % url_args['sha1_git'] - - -def test_enrich_revision_without_children_or_parent(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - - def reverse_test(view_name, url_args): - if view_name == 'api-1-revision': - return '/api/revision/' + url_args['sha1_git'] + '/' - elif view_name == 'api-1-revision-log': - return '/api/revision/' + url_args['sha1_git'] + '/log/' - elif view_name == 'api-1-directory': - return '/api/directory/' + url_args['sha1_git'] + '/' - - mock_django_reverse.side_effect = reverse_test - - actual_revision = utils.enrich_revision({ - 'id': 'rev-id', - 'directory': '123', - 'author': {'id': '1'}, - 'committer': {'id': '2'}, - }) - - expected_revision = { - 'id': 'rev-id', - 'directory': '123', - 'url': '/api/revision/rev-id/', - 'history_url': '/api/revision/rev-id/log/', - 'directory_url': '/api/directory/123/', - 'author': {'id': '1'}, - 'committer': {'id': '2'}, - } +@given(content()) +def test_enrich_content_with_hashes(api_request_factory, content): - assert actual_revision == expected_revision + for algo in DEFAULT_ALGORITHMS: - mock_django_reverse.assert_has_calls([ - mocker.call('api-1-revision', url_args={'sha1_git': 'rev-id'}), - mocker.call('api-1-revision-log', url_args={'sha1_git': 'rev-id'}), - mocker.call('api-1-directory', url_args={'sha1_git': '123'}) - ]) + content_data = dict(content) + query_string = '%s:%s' % (algo, content_data[algo]) -def test_enrich_revision_with_children_and_parent_no_dir(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - mock_django_reverse.side_effect = _reverse_context_test + url = reverse('api-1-content', url_args={'q': query_string}) + request = api_request_factory.get(url) - actual_revision = utils.enrich_revision({ - 'id': 'rev-id', - 'parents': ['123'], - 'children': ['456'], - }) + enriched_content = utils.enrich_content(content_data, + query_string=query_string, + request=request) - expected_revision = { - 'id': 'rev-id', - 'url': '/api/revision/rev-id/', - 'history_url': '/api/revision/rev-id/log/', - 'parents': [{'id': '123', 'url': '/api/revision/123/'}], - 'children': ['456'], - 'children_urls': ['/api/revision/456/'], - } + content_data['data_url'] = reverse('api-1-content-raw', + url_args={'q': query_string}, + request=request) - assert actual_revision == expected_revision - - mock_django_reverse.assert_has_calls([ - mocker.call('api-1-revision', url_args={'sha1_git': 'rev-id'}), - mocker.call('api-1-revision-log', url_args={'sha1_git': 'rev-id'}), - mocker.call('api-1-revision', url_args={'sha1_git': '123'}), - mocker.call('api-1-revision', url_args={'sha1_git': '456'}) - ]) - - -def test_enrich_revision_no_context(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - mock_django_reverse.side_effect = _reverse_context_test - - actual_revision = utils.enrich_revision({ - 'id': 'rev-id', - 'parents': ['123'], - 'children': ['456'], - }) - - expected_revision = { - 'id': 'rev-id', - 'url': '/api/revision/rev-id/', - 'history_url': '/api/revision/rev-id/log/', - 'parents': [{'id': '123', 'url': '/api/revision/123/'}], - 'children': ['456'], - 'children_urls': ['/api/revision/456/'] - } + content_data['filetype_url'] = reverse('api-1-content-filetype', + url_args={'q': query_string}, + request=request) - assert actual_revision == expected_revision - - mock_django_reverse.assert_has_calls([ - mocker.call('api-1-revision', url_args={'sha1_git': 'rev-id'}), - mocker.call('api-1-revision-log', url_args={'sha1_git': 'rev-id'}), - mocker.call('api-1-revision', url_args={'sha1_git': '123'}), - mocker.call('api-1-revision', url_args={'sha1_git': '456'}) - ]) - - -def _reverse_rev_message_test(view_name, url_args): - if view_name == 'api-1-revision': - return '/api/revision/%s/' % url_args['sha1_git'] - elif view_name == 'api-1-revision-log': - if 'prev_sha1s' in url_args and url_args['prev_sha1s'] is not None: - return ('/api/revision/%s/prev/%s/log/' % - (url_args['sha1_git'], url_args['prev_sha1s'])) - else: - return '/api/revision/%s/log/' % url_args['sha1_git'] - elif view_name == 'api-1-revision-raw-message': - return '/api/revision/' + url_args['sha1_git'] + '/raw/' - else: - return ('/api/revision/%s/prev/%s/' % - (url_args['sha1_git'], url_args['context'])) - - -def test_enrich_revision_with_no_message(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - mock_django_reverse.side_effect = _reverse_rev_message_test - - expected_revision = { - 'id': 'rev-id', - 'url': '/api/revision/rev-id/', - 'history_url': '/api/revision/rev-id/log/', - 'message': None, - 'parents': [{'id': '123', 'url': '/api/revision/123/'}], - 'children': ['456'], - 'children_urls': ['/api/revision/456/'], - } + content_data['language_url'] = reverse('api-1-content-language', + url_args={'q': query_string}, + request=request) + + content_data['license_url'] = reverse('api-1-content-license', + url_args={'q': query_string}, + request=request) + + assert enriched_content == content_data + + +@given(content()) +def test_enrich_content_with_hashes_and_top_level_url(api_request_factory, + content): + + for algo in DEFAULT_ALGORITHMS: + + content_data = dict(content) + + query_string = '%s:%s' % (algo, content_data[algo]) + + url = reverse('api-1-content', url_args={'q': query_string}) + request = api_request_factory.get(url) + + enriched_content = utils.enrich_content(content_data, + query_string=query_string, + top_url=True, + request=request) + + content_data['content_url'] = reverse('api-1-content', + url_args={'q': query_string}, + request=request) + + content_data['data_url'] = reverse('api-1-content-raw', + url_args={'q': query_string}, + request=request) + + content_data['filetype_url'] = reverse('api-1-content-filetype', + url_args={'q': query_string}, + request=request) - actual_revision = utils.enrich_revision({ - 'id': 'rev-id', - 'message': None, - 'parents': ['123'], - 'children': ['456'], - }) - - assert actual_revision == expected_revision - - mock_django_reverse.assert_has_calls([ - mocker.call('api-1-revision', url_args={'sha1_git': 'rev-id'}), - mocker.call('api-1-revision-log', url_args={'sha1_git': 'rev-id'}), - mocker.call('api-1-revision', url_args={'sha1_git': '123'}), - mocker.call('api-1-revision', url_args={'sha1_git': '456'}) - ]) - - -def test_enrich_revision_with_invalid_message(mocker): - mock_django_reverse = mocker.patch('swh.web.api.utils.reverse') - mock_django_reverse.side_effect = _reverse_rev_message_test - - actual_revision = utils.enrich_revision({ - 'id': 'rev-id', - 'message': None, - 'message_decoding_failed': True, - 'parents': ['123'], - 'children': ['456'], - }) - - expected_revision = { - 'id': 'rev-id', - 'url': '/api/revision/rev-id/', - 'history_url': '/api/revision/rev-id/log/', - 'message': None, - 'message_decoding_failed': True, - 'message_url': '/api/revision/rev-id/raw/', - 'parents': [{'id': '123', 'url': '/api/revision/123/'}], - 'children': ['456'], - 'children_urls': ['/api/revision/456/'], + content_data['language_url'] = reverse('api-1-content-language', + url_args={'q': query_string}, + request=request) + + content_data['license_url'] = reverse('api-1-content-license', + url_args={'q': query_string}, + request=request) + + assert enriched_content == content_data + + +@given(revision()) +def test_enrich_revision_without_children_or_parent(api_request_factory, + archive_data, revision): + + revision_data = archive_data.revision_get(revision) + del revision_data['parents'] + + url = reverse('api-1-revision', url_args={'sha1_git': revision}) + request = api_request_factory.get(url) + + actual_revision = utils.enrich_revision(revision_data, request) + + revision_data['url'] = reverse( + 'api-1-revision', + url_args={'sha1_git': revision}, + request=request) + + revision_data['history_url'] = reverse( + 'api-1-revision-log', + url_args={'sha1_git': revision}, + request=request) + + revision_data['directory_url'] = reverse( + 'api-1-directory', + url_args={'sha1_git': revision_data['directory']}, + request=request) + + assert actual_revision == revision_data + + +@given(revision(), revision(), revision()) +def test_enrich_revision_with_children_and_parent_no_dir(api_request_factory, + archive_data, + revision, + parent_revision, + child_revision): + + revision_data = archive_data.revision_get(revision) + del revision_data['directory'] + revision_data['parents'].append(parent_revision) + revision_data['children'] = child_revision + + url = reverse('api-1-revision', url_args={'sha1_git': revision}) + request = api_request_factory.get(url) + + actual_revision = utils.enrich_revision(revision_data, request) + + revision_data['url'] = reverse( + 'api-1-revision', + url_args={'sha1_git': revision}, + request=request) + + revision_data['history_url'] = reverse( + 'api-1-revision-log', + url_args={'sha1_git': revision}, + request=request) + + revision_data['parents'] = [ + {'id': p['id'], 'url': reverse('api-1-revision', + url_args={'sha1_git': p['id']}, + request=request)} + for p in revision_data['parents'] + ] + + revision_data['children_urls'] = [ + reverse('api-1-revision', + url_args={'sha1_git': child_revision}, + request=request) + ] + + assert actual_revision == revision_data + + +@given(revision(), revision(), revision()) +def test_enrich_revision_no_context(api_request_factory, + revision, + parent_revision, + child_revision): + + revision_data = { + 'id': revision, + 'parents': [parent_revision], + 'children': [child_revision] } - assert actual_revision == expected_revision + url = reverse('api-1-revision', url_args={'sha1_git': revision}) + request = api_request_factory.get(url) + + actual_revision = utils.enrich_revision(revision_data, request) + + revision_data['url'] = reverse( + 'api-1-revision', + url_args={'sha1_git': revision}, + request=request) + + revision_data['history_url'] = reverse( + 'api-1-revision-log', + url_args={'sha1_git': revision}, + request=request) + + revision_data['parents'] = [{ + 'id': parent_revision, + 'url': reverse('api-1-revision', + url_args={'sha1_git': parent_revision}, + request=request) + }] + + revision_data['children_urls'] = [ + reverse('api-1-revision', + url_args={'sha1_git': child_revision}, + request=request) + ] + + assert actual_revision == revision_data + + +@given(revision(), revision(), revision()) +def test_enrich_revision_with_no_message(api_request_factory, + archive_data, + revision, + parent_revision, + child_revision): + + revision_data = archive_data.revision_get(revision) + revision_data['message'] = None + revision_data['parents'].append(parent_revision) + revision_data['children'] = child_revision + + url = reverse('api-1-revision', url_args={'sha1_git': revision}) + request = api_request_factory.get(url) + + actual_revision = utils.enrich_revision(revision_data, request) + + revision_data['url'] = reverse( + 'api-1-revision', + url_args={'sha1_git': revision}, + request=request) + + revision_data['directory_url'] = reverse( + 'api-1-directory', + url_args={'sha1_git': revision_data['directory']}, + request=request) + + revision_data['history_url'] = reverse( + 'api-1-revision-log', + url_args={'sha1_git': revision}, + request=request) + + revision_data['parents'] = [ + {'id': p['id'], 'url': reverse('api-1-revision', + url_args={'sha1_git': p['id']}, + request=request)} + for p in revision_data['parents'] + ] + + revision_data['children_urls'] = [ + reverse('api-1-revision', + url_args={'sha1_git': child_revision}, + request=request) + ] + + assert actual_revision == revision_data + + +@given(revision(), revision(), revision()) +def test_enrich_revision_with_invalid_message(api_request_factory, + archive_data, + revision, + parent_revision, + child_revision): + + revision_data = archive_data.revision_get(revision) + revision_data['message'] = None + revision_data['message_decoding_failed'] = True, + revision_data['parents'].append(parent_revision) + revision_data['children'] = child_revision + + url = reverse('api-1-revision', url_args={'sha1_git': revision}) + request = api_request_factory.get(url) + + actual_revision = utils.enrich_revision(revision_data, request) + + revision_data['url'] = reverse( + 'api-1-revision', + url_args={'sha1_git': revision}, + request=request) + + revision_data['message_url'] = reverse( + 'api-1-revision-raw-message', + url_args={'sha1_git': revision}, + request=request) + + revision_data['directory_url'] = reverse( + 'api-1-directory', + url_args={'sha1_git': revision_data['directory']}, + request=request) + + revision_data['history_url'] = reverse( + 'api-1-revision-log', + url_args={'sha1_git': revision}, + request=request) + + revision_data['parents'] = [ + {'id': p['id'], 'url': reverse('api-1-revision', + url_args={'sha1_git': p['id']}, + request=request)} + for p in revision_data['parents'] + ] + + revision_data['children_urls'] = [ + reverse('api-1-revision', + url_args={'sha1_git': child_revision}, + request=request) + ] + + assert actual_revision == revision_data + + +@given(snapshot()) +def test_enrich_snapshot(api_request_factory, archive_data, snapshot): + snapshot_data = archive_data.snapshot_get(snapshot) + + url = reverse('api-1-snapshot', url_args={'snapshot_id': snapshot}) + request = api_request_factory.get(url) + + actual_snapshot = utils.enrich_snapshot(snapshot_data, request) + + for _, b in snapshot_data['branches'].items(): + if b['target_type'] in ('directory', 'revision', 'release'): + b['target_url'] = reverse(f'api-1-{b["target_type"]}', + url_args={'sha1_git': b['target']}, + request=request) + elif b['target_type'] == 'content': + b['target_url'] = reverse( + 'api-1-content', + url_args={'q': f'sha1_git:{b["target"]}'}, + request=request) + + for _, b in snapshot_data['branches'].items(): + if b['target_type'] == 'alias': + target = resolve_branch_alias(snapshot_data, b) + b['target_url'] = target['target_url'] + + assert actual_snapshot == snapshot_data + + +@given(origin()) +def test_enrich_origin(api_request_factory, archive_data, origin): + url = reverse('api-1-origin', url_args={'origin_url': origin['url']}) + request = api_request_factory.get(url) + + origin_data = {'url': origin['url']} + actual_origin = utils.enrich_origin(origin_data, request) + + origin_data['origin_visits_url'] = reverse( + 'api-1-origin-visits', + url_args={'origin_url': origin['url']}, + request=request) + + assert actual_origin == origin_data + + +@given(origin()) +def test_enrich_origin_visit(api_request_factory, archive_data, origin): + + origin_visit = random.choice(get_origin_visits(origin)) + + url = reverse('api-1-origin-visit', + url_args={'origin_url': origin['url'], + 'visit_id': origin_visit['visit']}) + request = api_request_factory.get(url) + + actual_origin_visit = utils.enrich_origin_visit( + origin_visit, with_origin_link=True, + with_origin_visit_link=True, request=request) + + origin_visit['origin_url'] = reverse( + 'api-1-origin', + url_args={'origin_url': origin['url']}, + request=request) + + origin_visit['origin_visit_url'] = reverse( + 'api-1-origin-visit', + url_args={'origin_url': origin['url'], + 'visit_id': origin_visit['visit']}, + request=request) + + origin_visit['snapshot_url'] = reverse( + 'api-1-snapshot', + url_args={'snapshot_id': origin_visit['snapshot']}, + request=request) - mock_django_reverse.assert_has_calls([ - mocker.call('api-1-revision', url_args={'sha1_git': 'rev-id'}), - mocker.call('api-1-revision-log', url_args={'sha1_git': 'rev-id'}), - mocker.call('api-1-revision', url_args={'sha1_git': '123'}), - mocker.call('api-1-revision', url_args={'sha1_git': '456'}), - mocker.call('api-1-revision-raw-message', - url_args={'sha1_git': 'rev-id'}) - ]) + assert actual_origin_visit == origin_visit diff --git a/swh/web/tests/api/views/test_content.py b/swh/web/tests/api/views/test_content.py index 0c490ee8..95ddec97 100644 --- a/swh/web/tests/api/views/test_content.py +++ b/swh/web/tests/api/views/test_content.py @@ -1,381 +1,387 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from hypothesis import given from swh.web.common.utils import reverse from swh.web.tests.data import random_content from swh.web.tests.strategies import content, contents_with_ctags from swh.web.tests.conftest import ctags_json_missing, fossology_missing @given(content()) def test_api_content_filetype(api_client, indexer_data, content): indexer_data.content_add_mimetype(content['sha1']) url = reverse('api-1-content-filetype', url_args={'q': 'sha1_git:%s' % content['sha1_git']}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' content_url = reverse('api-1-content', - url_args={'q': 'sha1:%s' % content['sha1']}) + url_args={'q': 'sha1:%s' % content['sha1']}, + request=rv.wsgi_request) expected_data = indexer_data.content_get_mimetype(content['sha1']) expected_data['content_url'] = content_url assert rv.data == expected_data def test_api_content_filetype_sha_not_found(api_client): unknown_content_ = random_content() url = reverse('api-1-content-filetype', url_args={'q': 'sha1:%s' % unknown_content_['sha1']}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'No filetype information found for content ' 'sha1:%s.' % unknown_content_['sha1'] } @pytest.mark.skip # Language indexer is disabled @given(content()) def test_api_content_language(api_client, indexer_data, content): indexer_data.content_add_language(content['sha1']) url = reverse('api-1-content-language', url_args={'q': 'sha1_git:%s' % content['sha1_git']}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' content_url = reverse('api-1-content', - url_args={'q': 'sha1:%s' % content['sha1']}) + url_args={'q': 'sha1:%s' % content['sha1']}, + request=rv.wsgi_request) expected_data = indexer_data.content_get_language(content['sha1']) expected_data['content_url'] = content_url assert rv.data == expected_data def test_api_content_language_sha_not_found(api_client): unknown_content_ = random_content() url = reverse('api-1-content-language', url_args={'q': 'sha1:%s' % unknown_content_['sha1']}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'No language information found for content ' 'sha1:%s.' % unknown_content_['sha1'] } @pytest.mark.skip # Language indexer is disabled @pytest.mark.skipif(ctags_json_missing, reason="requires ctags with json output support") @given(contents_with_ctags()) def test_api_content_symbol(api_client, indexer_data, contents_with_ctags): expected_data = {} for content_sha1 in contents_with_ctags['sha1s']: indexer_data.content_add_ctags(content_sha1) for ctag in indexer_data.content_get_ctags(content_sha1): if ctag['name'] == contents_with_ctags['symbol_name']: expected_data[content_sha1] = ctag break url = reverse('api-1-content-symbol', url_args={'q': contents_with_ctags['symbol_name']}, query_params={'per_page': 100}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' for entry in rv.data: content_sha1 = entry['sha1'] expected_entry = expected_data[content_sha1] for key, view_name in (('content_url', 'api-1-content'), ('data_url', 'api-1-content-raw'), ('license_url', 'api-1-content-license'), ('language_url', 'api-1-content-language'), ('filetype_url', 'api-1-content-filetype')): expected_entry[key] = reverse( - view_name, url_args={'q': 'sha1:%s' % content_sha1}) + view_name, url_args={'q': 'sha1:%s' % content_sha1}, + request=rv.wsgi_request) expected_entry['sha1'] = content_sha1 del expected_entry['id'] assert entry == expected_entry assert 'Link' not in rv url = reverse('api-1-content-symbol', url_args={'q': contents_with_ctags['symbol_name']}, query_params={'per_page': 2}) rv = api_client.get(url) - next_url = rv.wsgi_request.build_absolute_uri( - reverse('api-1-content-symbol', - url_args={'q': contents_with_ctags['symbol_name']}, - query_params={'last_sha1': rv.data[1]['sha1'], - 'per_page': 2})) + next_url = reverse('api-1-content-symbol', + url_args={'q': contents_with_ctags['symbol_name']}, + query_params={'last_sha1': rv.data[1]['sha1'], + 'per_page': 2}, + request=rv.wsgi_request), assert rv['Link'] == '<%s>; rel="next"' % next_url def test_api_content_symbol_not_found(api_client): url = reverse('api-1-content-symbol', url_args={'q': 'bar'}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'No indexed raw content match expression \'bar\'.' } assert 'Link' not in rv @pytest.mark.skipif(ctags_json_missing, reason="requires ctags with json output support") @given(content()) def test_api_content_ctags(api_client, indexer_data, content): indexer_data.content_add_ctags(content['sha1']) url = reverse('api-1-content-ctags', url_args={'q': 'sha1_git:%s' % content['sha1_git']}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' content_url = reverse('api-1-content', - url_args={'q': 'sha1:%s' % content['sha1']}) + url_args={'q': 'sha1:%s' % content['sha1']}, + request=rv.wsgi_request) expected_data = list(indexer_data.content_get_ctags(content['sha1'])) for e in expected_data: e['content_url'] = content_url assert rv.data == expected_data @pytest.mark.skipif(fossology_missing, reason="requires fossology-nomossa installed") @given(content()) def test_api_content_license(api_client, indexer_data, content): indexer_data.content_add_license(content['sha1']) url = reverse('api-1-content-license', url_args={'q': 'sha1_git:%s' % content['sha1_git']}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' content_url = reverse('api-1-content', - url_args={'q': 'sha1:%s' % content['sha1']}) + url_args={'q': 'sha1:%s' % content['sha1']}, + request=rv.wsgi_request) expected_data = indexer_data.content_get_license(content['sha1']) expected_data['content_url'] = content_url assert rv.data == expected_data def test_api_content_license_sha_not_found(api_client): unknown_content_ = random_content() url = reverse('api-1-content-license', url_args={'q': 'sha1:%s' % unknown_content_['sha1']}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'No license information found for content ' 'sha1:%s.' % unknown_content_['sha1'] } @given(content()) def test_api_content_metadata(api_client, archive_data, content): url = reverse('api-1-content', {'q': 'sha1:%s' % content['sha1']}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' expected_data = archive_data.content_get_metadata(content['sha1']) for key, view_name in (('data_url', 'api-1-content-raw'), ('license_url', 'api-1-content-license'), ('language_url', 'api-1-content-language'), ('filetype_url', 'api-1-content-filetype')): expected_data[key] = reverse( - view_name, url_args={'q': 'sha1:%s' % content['sha1']}) + view_name, url_args={'q': 'sha1:%s' % content['sha1']}, + request=rv.wsgi_request) assert rv.data == expected_data def test_api_content_not_found_as_json(api_client): unknown_content_ = random_content() url = reverse('api-1-content', url_args={'q': 'sha1:%s' % unknown_content_['sha1']}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Content with sha1 checksum equals to %s not found!' % unknown_content_['sha1'] } def test_api_content_not_found_as_yaml(api_client): unknown_content_ = random_content() url = reverse('api-1-content', url_args={'q': 'sha256:%s' % unknown_content_['sha256']}) rv = api_client.get(url, HTTP_ACCEPT='application/yaml') assert rv.status_code == 404, rv.data assert 'application/yaml' in rv['Content-Type'] assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Content with sha256 checksum equals to %s not found!' % unknown_content_['sha256'] } def test_api_content_raw_ko_not_found(api_client): unknown_content_ = random_content() url = reverse('api-1-content-raw', url_args={'q': 'sha1:%s' % unknown_content_['sha1']}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Content with sha1 checksum equals to %s not found!' % unknown_content_['sha1'] } @given(content()) def test_api_content_raw_text(api_client, archive_data, content): url = reverse('api-1-content-raw', url_args={'q': 'sha1:%s' % content['sha1']}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/octet-stream' assert rv['Content-disposition'] == \ 'attachment; filename=content_sha1_%s_raw' % content['sha1'] assert rv['Content-Type'] == 'application/octet-stream' expected_data = archive_data.content_get(content['sha1']) assert rv.content == expected_data['data'] @given(content()) def test_api_content_raw_text_with_filename(api_client, archive_data, content): url = reverse('api-1-content-raw', url_args={'q': 'sha1:%s' % content['sha1']}, query_params={'filename': 'filename.txt'}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/octet-stream' assert rv['Content-disposition'] == \ 'attachment; filename=filename.txt' assert rv['Content-Type'] == 'application/octet-stream' expected_data = archive_data.content_get(content['sha1']) assert rv.content == expected_data['data'] @given(content()) def test_api_check_content_known(api_client, content): url = reverse('api-1-content-known', url_args={'q': content['sha1']}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'search_res': [ { 'found': True, 'sha1': content['sha1'] } ], 'search_stats': {'nbfiles': 1, 'pct': 100.0} } @given(content()) def test_api_check_content_known_as_yaml(api_client, content): url = reverse('api-1-content-known', url_args={'q': content['sha1']}) rv = api_client.get(url, HTTP_ACCEPT='application/yaml') assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/yaml' assert rv.data == { 'search_res': [ { 'found': True, 'sha1': content['sha1'] } ], 'search_stats': {'nbfiles': 1, 'pct': 100.0} } @given(content()) def test_api_check_content_known_post_as_yaml(api_client, content): url = reverse('api-1-content-known') rv = api_client.post(url, data={'q': content['sha1']}, HTTP_ACCEPT='application/yaml') assert rv.status_code == 200, rv.data assert 'application/yaml' in rv['Content-Type'] assert rv.data == { 'search_res': [ { 'found': True, 'sha1': content['sha1'] } ], 'search_stats': {'nbfiles': 1, 'pct': 100.0} } def test_api_check_content_known_not_found(api_client): unknown_content_ = random_content() url = reverse('api-1-content-known', url_args={'q': unknown_content_['sha1']}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'search_res': [ { 'found': False, 'sha1': unknown_content_['sha1'] } ], 'search_stats': {'nbfiles': 1, 'pct': 0.0} } @given(content()) def test_api_content_uppercase(api_client, content): url = reverse('api-1-content-uppercase-checksum', url_args={'q': content['sha1'].upper()}) rv = api_client.get(url) assert rv.status_code == 302, rv.data redirect_url = reverse('api-1-content', url_args={'q': content['sha1']}) assert rv['location'] == redirect_url diff --git a/swh/web/tests/api/views/test_directory.py b/swh/web/tests/api/views/test_directory.py index 03ff9259..0f1d184e 100644 --- a/swh/web/tests/api/views/test_directory.py +++ b/swh/web/tests/api/views/test_directory.py @@ -1,103 +1,90 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from hypothesis import given +from swh.web.api.utils import enrich_directory from swh.web.common.utils import reverse from swh.web.tests.data import random_sha1 from swh.web.tests.strategies import directory @given(directory()) def test_api_directory(api_client, archive_data, directory): url = reverse('api-1-directory', url_args={'sha1_git': directory}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' - expected_data = list(map(_enrich_dir_data, - archive_data.directory_ls(directory))) + dir_content = list(archive_data.directory_ls(directory)) + expected_data = list(map(enrich_directory, + dir_content, + [rv.wsgi_request] * len(dir_content))) assert rv.data == expected_data def test_api_directory_not_found(api_client): unknown_directory_ = random_sha1() url = reverse('api-1-directory', url_args={'sha1_git': unknown_directory_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Directory with sha1_git %s not found' % unknown_directory_ } @given(directory()) def test_api_directory_with_path_found(api_client, archive_data, directory): directory_content = archive_data.directory_ls(directory) path = random.choice(directory_content) url = reverse('api-1-directory', url_args={'sha1_git': directory, 'path': path['name']}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' - assert rv.data == _enrich_dir_data(path) + assert rv.data == enrich_directory(path, rv.wsgi_request) @given(directory()) def test_api_directory_with_path_not_found(api_client, directory): path = 'some/path/to/nonexistent/dir/' url = reverse('api-1-directory', url_args={'sha1_git': directory, 'path': path}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': ('Directory entry with path %s from %s not found' % (path, directory)) } @given(directory()) def test_api_directory_uppercase(api_client, directory): url = reverse('api-1-directory-uppercase-checksum', url_args={'sha1_git': directory.upper()}) resp = api_client.get(url) assert resp.status_code == 302 redirect_url = reverse('api-1-directory', url_args={'sha1_git': directory}) assert resp['location'] == redirect_url - - -def _enrich_dir_data(dir_data): - if dir_data['type'] == 'file': - dir_data['target_url'] = reverse( - 'api-1-content', - url_args={'q': 'sha1_git:%s' % dir_data['target']}) - elif dir_data['type'] == 'dir': - dir_data['target_url'] = reverse( - 'api-1-directory', - url_args={'sha1_git': dir_data['target']}) - elif dir_data['type'] == 'rev': - dir_data['target_url'] = reverse( - 'api-1-revision', - url_args={'sha1_git': dir_data['target']}) - return dir_data diff --git a/swh/web/tests/api/views/test_identifiers.py b/swh/web/tests/api/views/test_identifiers.py index bb7e791a..7828ad3c 100644 --- a/swh/web/tests/api/views/test_identifiers.py +++ b/swh/web/tests/api/views/test_identifiers.py @@ -1,89 +1,90 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from hypothesis import given from swh.model.identifiers import ( CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT ) from swh.web.common.utils import reverse from swh.web.tests.strategies import ( content, directory, origin, release, revision, snapshot, unknown_content, unknown_directory, unknown_release, unknown_revision, unknown_snapshot ) @given(origin(), content(), directory(), release(), revision(), snapshot()) def test_swh_id_resolve_success(api_client, origin, content, directory, release, revision, snapshot): for obj_type_short, obj_type, obj_id in ( ('cnt', CONTENT, content['sha1_git']), ('dir', DIRECTORY, directory), ('rel', RELEASE, release), ('rev', REVISION, revision), ('snp', SNAPSHOT, snapshot)): swh_id = 'swh:1:%s:%s;origin=%s' % (obj_type_short, obj_id, origin['url']) url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': swh_id}) resp = api_client.get(url) if obj_type == CONTENT: url_args = {'query_string': 'sha1_git:%s' % obj_id} elif obj_type == SNAPSHOT: url_args = {'snapshot_id': obj_id} else: url_args = {'sha1_git': obj_id} browse_rev_url = reverse('browse-%s' % obj_type, url_args=url_args, - query_params={'origin': origin['url']}) + query_params={'origin': origin['url']}, + request=resp.wsgi_request) expected_result = { 'browse_url': browse_rev_url, 'metadata': {'origin': origin['url']}, 'namespace': 'swh', 'object_id': obj_id, 'object_type': obj_type, 'scheme_version': 1 } assert resp.status_code == 200, resp.data assert resp.data == expected_result def test_swh_id_resolve_invalid(api_client): rev_id_invalid = '96db9023b8_foo_50d6c108e9a3' swh_id = 'swh:1:rev:%s' % rev_id_invalid url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': swh_id}) resp = api_client.get(url) assert resp.status_code == 400, resp.data @given(unknown_content(), unknown_directory(), unknown_release(), unknown_revision(), unknown_snapshot()) def test_swh_id_resolve_not_found(api_client, unknown_content, unknown_directory, unknown_release, unknown_revision, unknown_snapshot): for obj_type_short, obj_id in (('cnt', unknown_content['sha1_git']), ('dir', unknown_directory), ('rel', unknown_release), ('rev', unknown_revision), ('snp', unknown_snapshot)): swh_id = 'swh:1:%s:%s' % (obj_type_short, obj_id) url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': swh_id}) resp = api_client.get(url) assert resp.status_code == 404, resp.data diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py index c5fb16b4..57f823f1 100644 --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -1,670 +1,641 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from hypothesis import given import pytest from requests.utils import parse_header_links from swh.storage.exc import StorageDBError, StorageAPIError +from swh.web.api.utils import enrich_origin_visit, enrich_origin from swh.web.common.exc import BadInputExc from swh.web.common.utils import reverse from swh.web.common.origin_visits import get_origin_visits from swh.web.tests.strategies import ( origin, new_origin, visit_dates, new_snapshots ) def _scroll_results(api_client, url): """Iterates through pages of results, and returns them all.""" results = [] while True: rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' results.extend(rv.data) if 'Link' in rv: for link in parse_header_links(rv['Link']): if link['rel'] == 'next': # Found link to next page of results url = link['url'] break else: # No link with 'rel=next' break else: # No Link header break return results def test_api_lookup_origin_visits_raise_error(api_client, mocker): mock_get_origin_visits = mocker.patch( 'swh.web.api.views.origin.get_origin_visits') err_msg = 'voluntary error to check the bad request middleware.' mock_get_origin_visits.side_effect = BadInputExc(err_msg) url = reverse('api-1-origin-visits', url_args={'origin_url': 'http://foo'}) rv = api_client.get(url) assert rv.status_code == 400, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'BadInputExc', 'reason': err_msg } def test_api_lookup_origin_visits_raise_swh_storage_error_db(api_client, mocker): mock_get_origin_visits = mocker.patch( 'swh.web.api.views.origin.get_origin_visits') err_msg = 'Storage exploded! Will be back online shortly!' mock_get_origin_visits.side_effect = StorageDBError(err_msg) url = reverse('api-1-origin-visits', url_args={'origin_url': 'http://foo'}) rv = api_client.get(url) assert rv.status_code == 503, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'StorageDBError', 'reason': 'An unexpected error occurred in the backend: %s' % err_msg } def test_api_lookup_origin_visits_raise_swh_storage_error_api(api_client, mocker): mock_get_origin_visits = mocker.patch( 'swh.web.api.views.origin.get_origin_visits') err_msg = 'Storage API dropped dead! Will resurrect asap!' mock_get_origin_visits.side_effect = StorageAPIError(err_msg) url = reverse( 'api-1-origin-visits', url_args={'origin_url': 'http://foo'}) rv = api_client.get(url) assert rv.status_code == 503, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'StorageAPIError', 'reason': 'An unexpected error occurred in the api backend: %s' % err_msg } @given(new_origin(), visit_dates(3), new_snapshots(3)) def test_api_lookup_origin_visits(api_client, archive_data, new_origin, visit_dates, new_snapshots): archive_data.origin_add_one(new_origin) for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( new_origin['url'], visit_date, type='git') archive_data.snapshot_add([new_snapshots[i]]) archive_data.origin_visit_update( new_origin['url'], origin_visit['visit'], snapshot=new_snapshots[i]['id']) all_visits = list(reversed(get_origin_visits(new_origin))) for last_visit, expected_visits in ( (None, all_visits[:2]), - (all_visits[1]['visit'], all_visits[2:4])): + (all_visits[1]['visit'], all_visits[2:])): url = reverse('api-1-origin-visits', url_args={'origin_url': new_origin['url']}, query_params={'per_page': 2, 'last_visit': last_visit}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' - for expected_visit in expected_visits: - origin_visit_url = reverse( - 'api-1-origin-visit', - url_args={'origin_url': new_origin['url'], - 'visit_id': expected_visit['visit']}) - snapshot_url = reverse( - 'api-1-snapshot', - url_args={'snapshot_id': expected_visit['snapshot']}) - expected_visit['origin'] = new_origin['url'] - expected_visit['origin_visit_url'] = origin_visit_url - expected_visit['snapshot_url'] = snapshot_url + for i in range(len(expected_visits)): + expected_visits[i] = enrich_origin_visit( + expected_visits[i], with_origin_link=False, + with_origin_visit_link=True, request=rv.wsgi_request) assert rv.data == expected_visits @given(new_origin(), visit_dates(3), new_snapshots(3)) def test_api_lookup_origin_visits_by_id(api_client, archive_data, new_origin, visit_dates, new_snapshots): archive_data.origin_add_one(new_origin) for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( new_origin['url'], visit_date, type='git') archive_data.snapshot_add([new_snapshots[i]]) archive_data.origin_visit_update( new_origin['url'], origin_visit['visit'], snapshot=new_snapshots[i]['id']) all_visits = list(reversed(get_origin_visits(new_origin))) for last_visit, expected_visits in ( (None, all_visits[:2]), (all_visits[1]['visit'], all_visits[2:4])): url = reverse('api-1-origin-visits', url_args={'origin_url': new_origin['url']}, query_params={'per_page': 2, 'last_visit': last_visit}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' - for expected_visit in expected_visits: - origin_visit_url = reverse( - 'api-1-origin-visit', - url_args={'origin_url': new_origin['url'], - 'visit_id': expected_visit['visit']}) - snapshot_url = reverse( - 'api-1-snapshot', - url_args={'snapshot_id': expected_visit['snapshot']}) - expected_visit['origin'] = new_origin['url'] - expected_visit['origin_visit_url'] = origin_visit_url - expected_visit['snapshot_url'] = snapshot_url + for i in range(len(expected_visits)): + expected_visits[i] = enrich_origin_visit( + expected_visits[i], with_origin_link=False, + with_origin_visit_link=True, request=rv.wsgi_request) assert rv.data == expected_visits @given(new_origin(), visit_dates(3), new_snapshots(3)) def test_api_lookup_origin_visit(api_client, archive_data, new_origin, visit_dates, new_snapshots): archive_data.origin_add_one(new_origin) for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( new_origin['url'], visit_date, type='git') visit_id = origin_visit['visit'] archive_data.snapshot_add([new_snapshots[i]]) archive_data.origin_visit_update( new_origin['url'], origin_visit['visit'], snapshot=new_snapshots[i]['id']) url = reverse('api-1-origin-visit', url_args={'origin_url': new_origin['url'], 'visit_id': visit_id}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' expected_visit = archive_data.origin_visit_get_by( new_origin['url'], visit_id) - origin_url = reverse('api-1-origin', - url_args={'origin_url': new_origin['url']}) - snapshot_url = reverse( - 'api-1-snapshot', - url_args={'snapshot_id': expected_visit['snapshot']}) - - expected_visit['origin'] = new_origin['url'] - expected_visit['origin_url'] = origin_url - expected_visit['snapshot_url'] = snapshot_url + expected_visit = enrich_origin_visit( + expected_visit, with_origin_link=True, + with_origin_visit_link=False, request=rv.wsgi_request) assert rv.data == expected_visit @given(new_origin()) def test_api_lookup_origin_visit_latest_no_visit(api_client, archive_data, new_origin): archive_data.origin_add_one(new_origin) url = reverse('api-1-origin-visit-latest', url_args={'origin_url': new_origin['url']}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'No visit for origin %s found' % new_origin['url'] } @given(new_origin(), visit_dates(2), new_snapshots(1)) def test_api_lookup_origin_visit_latest(api_client, archive_data, new_origin, visit_dates, new_snapshots): archive_data.origin_add_one(new_origin) visit_dates.sort() visit_ids = [] for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( new_origin['url'], visit_date, type='git') visit_ids.append(origin_visit['visit']) archive_data.snapshot_add([new_snapshots[0]]) archive_data.origin_visit_update( new_origin['url'], visit_ids[0], snapshot=new_snapshots[0]['id']) url = reverse('api-1-origin-visit-latest', url_args={'origin_url': new_origin['url']}) rv = api_client.get(url) + assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' expected_visit = archive_data.origin_visit_get_by( new_origin['url'], visit_ids[1]) - origin_url = reverse('api-1-origin', - url_args={'origin_url': new_origin['url']}) - - expected_visit['origin'] = new_origin['url'] - expected_visit['origin_url'] = origin_url - expected_visit['snapshot_url'] = None + expected_visit = enrich_origin_visit( + expected_visit, with_origin_link=True, + with_origin_visit_link=False, request=rv.wsgi_request) assert rv.data == expected_visit @given(new_origin(), visit_dates(2), new_snapshots(1)) def test_api_lookup_origin_visit_latest_with_snapshot(api_client, archive_data, new_origin, visit_dates, new_snapshots): archive_data.origin_add_one(new_origin) visit_dates.sort() visit_ids = [] for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( new_origin['url'], visit_date, type='git') visit_ids.append(origin_visit['visit']) archive_data.snapshot_add([new_snapshots[0]]) archive_data.origin_visit_update( new_origin['url'], visit_ids[0], snapshot=new_snapshots[0]['id']) url = reverse('api-1-origin-visit-latest', - url_args={'origin_url': new_origin['url']}) - url += '?require_snapshot=true' + url_args={'origin_url': new_origin['url']}, + query_params={'require_snapshot': True}) rv = api_client.get(url) + assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' expected_visit = archive_data.origin_visit_get_by( new_origin['url'], visit_ids[0]) - origin_url = reverse('api-1-origin', - url_args={'origin_url': new_origin['url']}) - snapshot_url = reverse( - 'api-1-snapshot', - url_args={'snapshot_id': expected_visit['snapshot']}) - - expected_visit['origin'] = new_origin['url'] - expected_visit['origin_url'] = origin_url - expected_visit['snapshot_url'] = snapshot_url + expected_visit = enrich_origin_visit( + expected_visit, with_origin_link=True, + with_origin_visit_link=False, request=rv.wsgi_request) assert rv.data == expected_visit @given(origin()) def test_api_lookup_origin_visit_not_found(api_client, origin): all_visits = list(reversed(get_origin_visits(origin))) max_visit_id = max([v['visit'] for v in all_visits]) url = reverse('api-1-origin-visit', url_args={'origin_url': origin['url'], 'visit_id': max_visit_id + 1}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Origin %s or its visit with id %s not found!' % (origin['url'], max_visit_id+1) } def test_api_origins(api_client, archive_data): origins = list(archive_data.origin_get_range(0, 10000)) origin_urls = {origin['url'] for origin in origins} # Get only one url = reverse('api-1-origins', query_params={'origin_count': 1}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert len(rv.data) == 1 assert {origin['url'] for origin in rv.data} <= origin_urls # Get all url = reverse('api-1-origins', query_params={'origin_count': len(origins)}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert len(rv.data) == len(origins) assert {origin['url'] for origin in rv.data} == origin_urls # Get "all + 10" url = reverse('api-1-origins', query_params={'origin_count': len(origins)+10}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert len(rv.data) == len(origins) assert {origin['url'] for origin in rv.data} == origin_urls @pytest.mark.parametrize('origin_count', [1, 2, 10, 100]) def test_api_origins_scroll(api_client, archive_data, origin_count): origins = list(archive_data.origin_get_range(0, 10000)) origin_urls = {origin['url'] for origin in origins} url = reverse('api-1-origins', query_params={'origin_count': origin_count}) results = _scroll_results(api_client, url) assert len(results) == len(origins) assert {origin['url'] for origin in results} == origin_urls @given(origin()) def test_api_origin_by_url(api_client, archive_data, origin): url = reverse('api-1-origin', url_args={'origin_url': origin['url']}) rv = api_client.get(url) expected_origin = archive_data.origin_get(origin) - origin_visits_url = reverse('api-1-origin-visits', - url_args={'origin_url': origin['url']}) - - expected_origin['origin_visits_url'] = origin_visits_url + expected_origin = enrich_origin(expected_origin, rv.wsgi_request) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == expected_origin @given(new_origin()) def test_api_origin_not_found(api_client, new_origin): url = reverse('api-1-origin', url_args={'origin_url': new_origin['url']}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Origin with url %s not found!' % new_origin['url'] } def test_api_origin_search(api_client): expected_origins = { 'https://github.com/wcoder/highlightjs-line-numbers.js', 'https://github.com/memononen/libtess2', } # Search for 'github.com', get only one url = reverse('api-1-origin-search', url_args={'url_pattern': 'github.com'}, query_params={'limit': 1}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert len(rv.data) == 1 assert {origin['url'] for origin in rv.data} <= expected_origins # Search for 'github.com', get all url = reverse('api-1-origin-search', url_args={'url_pattern': 'github.com'}, query_params={'limit': 2}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert {origin['url'] for origin in rv.data} == expected_origins # Search for 'github.com', get more than available url = reverse('api-1-origin-search', url_args={'url_pattern': 'github.com'}, query_params={'limit': 10}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert {origin['url'] for origin in rv.data} == expected_origins def test_api_origin_search_words(api_client): expected_origins = { 'https://github.com/wcoder/highlightjs-line-numbers.js', 'https://github.com/memononen/libtess2', } url = reverse('api-1-origin-search', url_args={'url_pattern': 'github com'}, query_params={'limit': 2}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert {origin['url'] for origin in rv.data} == expected_origins url = reverse('api-1-origin-search', url_args={'url_pattern': 'com github'}, query_params={'limit': 2}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert {origin['url'] for origin in rv.data} == expected_origins url = reverse('api-1-origin-search', url_args={'url_pattern': 'memononen libtess2'}, query_params={'limit': 2}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert len(rv.data) == 1 assert {origin['url'] for origin in rv.data} \ == {'https://github.com/memononen/libtess2'} url = reverse('api-1-origin-search', url_args={'url_pattern': 'libtess2 memononen'}, query_params={'limit': 2}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert len(rv.data) == 1 assert {origin['url'] for origin in rv.data} \ == {'https://github.com/memononen/libtess2'} @pytest.mark.parametrize('limit', [1, 2, 3, 10]) def test_api_origin_search_scroll(api_client, archive_data, limit): expected_origins = { 'https://github.com/wcoder/highlightjs-line-numbers.js', 'https://github.com/memononen/libtess2', } url = reverse('api-1-origin-search', url_args={'url_pattern': 'github.com'}, query_params={'limit': limit}) results = _scroll_results(api_client, url) assert {origin['url'] for origin in results} == expected_origins def test_api_origin_search_limit(api_client, archive_data): archive_data.origin_add([ {'url': 'http://foobar/{}'.format(i)} for i in range(2000) ]) url = reverse('api-1-origin-search', url_args={'url_pattern': 'foobar'}, query_params={'limit': 1050}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert len(rv.data) == 1000 @given(origin()) def test_api_origin_metadata_search(api_client, mocker, origin): mock_idx_storage = mocker.patch('swh.web.common.service.idx_storage') oimsft = mock_idx_storage.origin_intrinsic_metadata_search_fulltext oimsft.side_effect = lambda conjunction, limit: [{ 'from_revision': ( b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed ' b'\xf2U\xfa\x05B8'), 'metadata': {'author': 'Jane Doe'}, 'id': origin['url'], 'tool': { 'configuration': { 'context': ['NpmMapping', 'CodemetaMapping'], 'type': 'local' }, 'id': 3, 'name': 'swh-metadata-detector', 'version': '0.0.1' } }] url = reverse('api-1-origin-metadata-search', query_params={'fulltext': 'Jane Doe'}) rv = api_client.get(url) assert rv.status_code == 200, rv.content assert rv['Content-Type'] == 'application/json' expected_data = [{ 'url': origin['url'], 'metadata': { 'metadata': {'author': 'Jane Doe'}, 'from_revision': ( '7026b7c1a2af56521e951c01ed20f255fa054238'), 'tool': { 'configuration': { 'context': ['NpmMapping', 'CodemetaMapping'], 'type': 'local' }, 'id': 3, 'name': 'swh-metadata-detector', 'version': '0.0.1', } } }] assert rv.data == expected_data oimsft.assert_called_with(conjunction=['Jane Doe'], limit=70) @given(origin()) def test_api_origin_metadata_search_limit(api_client, mocker, origin): mock_idx_storage = mocker.patch('swh.web.common.service.idx_storage') oimsft = mock_idx_storage.origin_intrinsic_metadata_search_fulltext oimsft.side_effect = lambda conjunction, limit: [{ 'from_revision': ( b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed ' b'\xf2U\xfa\x05B8'), 'metadata': {'author': 'Jane Doe'}, 'id': origin['url'], 'tool': { 'configuration': { 'context': ['NpmMapping', 'CodemetaMapping'], 'type': 'local' }, 'id': 3, 'name': 'swh-metadata-detector', 'version': '0.0.1' } }] url = reverse('api-1-origin-metadata-search', query_params={'fulltext': 'Jane Doe'}) rv = api_client.get(url) assert rv.status_code == 200, rv.content assert rv['Content-Type'] == 'application/json' assert len(rv.data) == 1 oimsft.assert_called_with(conjunction=['Jane Doe'], limit=70) url = reverse('api-1-origin-metadata-search', query_params={'fulltext': 'Jane Doe', 'limit': 10}) rv = api_client.get(url) assert rv.status_code == 200, rv.content assert rv['Content-Type'] == 'application/json' assert len(rv.data) == 1 oimsft.assert_called_with(conjunction=['Jane Doe'], limit=10) url = reverse('api-1-origin-metadata-search', query_params={'fulltext': 'Jane Doe', 'limit': 987}) rv = api_client.get(url) assert rv.status_code == 200, rv.content assert rv['Content-Type'] == 'application/json' assert len(rv.data) == 1 oimsft.assert_called_with(conjunction=['Jane Doe'], limit=100) @given(origin()) def test_api_origin_intrinsic_metadata(api_client, mocker, origin): mock_idx_storage = mocker.patch('swh.web.common.service.idx_storage') oimg = mock_idx_storage.origin_intrinsic_metadata_get oimg.side_effect = lambda origin_urls: [{ 'from_revision': ( b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed ' b'\xf2U\xfa\x05B8'), 'metadata': {'author': 'Jane Doe'}, 'id': origin['url'], 'tool': { 'configuration': { 'context': ['NpmMapping', 'CodemetaMapping'], 'type': 'local' }, 'id': 3, 'name': 'swh-metadata-detector', 'version': '0.0.1' } }] url = reverse('api-origin-intrinsic-metadata', url_args={'origin_url': origin['url']}) rv = api_client.get(url) oimg.assert_called_once_with([origin['url']]) assert rv.status_code == 200, rv.content assert rv['Content-Type'] == 'application/json' expected_data = {'author': 'Jane Doe'} assert rv.data == expected_data def test_api_origin_metadata_search_invalid(api_client, mocker): mock_idx_storage = mocker.patch('swh.web.common.service.idx_storage') url = reverse('api-1-origin-metadata-search') rv = api_client.get(url) assert rv.status_code == 400, rv.content mock_idx_storage.assert_not_called() diff --git a/swh/web/tests/api/views/test_release.py b/swh/web/tests/api/views/test_release.py index 0e7dcc08..7a6015d3 100644 --- a/swh/web/tests/api/views/test_release.py +++ b/swh/web/tests/api/views/test_release.py @@ -1,114 +1,116 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime from hypothesis import given from swh.model.hashutil import hash_to_bytes from swh.web.common.utils import reverse from swh.web.tests.data import random_sha1 from swh.web.tests.strategies import ( release, sha1, content, directory ) @given(release()) def test_api_release(api_client, archive_data, release): url = reverse('api-1-release', url_args={'sha1_git': release}) rv = api_client.get(url) expected_release = archive_data.release_get(release) target_revision = expected_release['target'] target_url = reverse('api-1-revision', - url_args={'sha1_git': target_revision}) + url_args={'sha1_git': target_revision}, + request=rv.wsgi_request) expected_release['target_url'] = target_url assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == expected_release @given(sha1(), sha1(), sha1(), content(), directory(), release()) def test_api_release_target_type_not_a_revision(api_client, archive_data, new_rel1, new_rel2, new_rel3, content, directory, release): for new_rel_id, target_type, target in ( (new_rel1, 'content', content), (new_rel2, 'directory', directory), (new_rel3, 'release', release)): if target_type == 'content': target = target['sha1_git'] sample_release = { 'author': { 'email': b'author@company.org', 'fullname': b'author ', 'name': b'author' }, 'date': { 'timestamp': int(datetime.now().timestamp()), 'offset': 0, 'negative_utc': False, }, 'id': hash_to_bytes(new_rel_id), 'message': b'sample release message', 'name': b'sample release', 'synthetic': False, 'target': hash_to_bytes(target), 'target_type': target_type } archive_data.release_add([sample_release]) url = reverse('api-1-release', url_args={'sha1_git': new_rel_id}) rv = api_client.get(url) expected_release = archive_data.release_get(new_rel_id) if target_type == 'content': url_args = {'q': 'sha1_git:%s' % target} else: url_args = {'sha1_git': target} target_url = reverse('api-1-%s' % target_type, - url_args=url_args) + url_args=url_args, + request=rv.wsgi_request) expected_release['target_url'] = target_url assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == expected_release def test_api_release_not_found(api_client): unknown_release_ = random_sha1() url = reverse('api-1-release', url_args={'sha1_git': unknown_release_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Release with sha1_git %s not found.' % unknown_release_ } @given(release()) def test_api_release_uppercase(api_client, release): url = reverse('api-1-release-uppercase-checksum', url_args={'sha1_git': release.upper()}) resp = api_client.get(url) assert resp.status_code == 302 redirect_url = reverse('api-1-release-uppercase-checksum', url_args={'sha1_git': release}) assert resp['location'] == redirect_url diff --git a/swh/web/tests/api/views/test_revision.py b/swh/web/tests/api/views/test_revision.py index f8c84a2d..ab590333 100644 --- a/swh/web/tests/api/views/test_revision.py +++ b/swh/web/tests/api/views/test_revision.py @@ -1,271 +1,259 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from hypothesis import given +from swh.web.api.utils import enrich_revision from swh.web.common.exc import NotFoundExc - from swh.web.common.utils import reverse from swh.web.tests.data import random_sha1 from swh.web.tests.strategies import revision @given(revision()) def test_api_revision(api_client, archive_data, revision): url = reverse('api-1-revision', url_args={'sha1_git': revision}) rv = api_client.get(url) expected_revision = archive_data.revision_get(revision) - _enrich_revision(expected_revision) + enrich_revision(expected_revision, rv.wsgi_request) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == expected_revision def test_api_revision_not_found(api_client): unknown_revision_ = random_sha1() url = reverse('api-1-revision', url_args={'sha1_git': unknown_revision_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Revision with sha1_git %s not found.' % unknown_revision_ } @given(revision()) def test_api_revision_raw_ok(api_client, archive_data, revision): url = reverse('api-1-revision-raw-message', url_args={'sha1_git': revision}) rv = api_client.get(url) expected_message = archive_data.revision_get(revision)['message'] assert rv.status_code == 200 assert rv['Content-Type'] == 'application/octet-stream' assert rv.content == expected_message.encode() def test_api_revision_raw_ko_no_rev(api_client): unknown_revision_ = random_sha1() url = reverse('api-1-revision-raw-message', url_args={'sha1_git': unknown_revision_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Revision with sha1_git %s not found.' % unknown_revision_ } @given(revision()) def test_api_revision_log(api_client, archive_data, revision): per_page = 10 url = reverse('api-1-revision-log', url_args={'sha1_git': revision}, query_params={'per_page': per_page}) rv = api_client.get(url) expected_log = archive_data.revision_log(revision, limit=per_page+1) - expected_log = list(map(_enrich_revision, expected_log)) + expected_log = list(map(enrich_revision, expected_log, + [rv.wsgi_request] * len(expected_log))) has_next = len(expected_log) > per_page assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == (expected_log[:-1] if has_next else expected_log) if has_next: assert 'Link' in rv next_log_url = rv.wsgi_request.build_absolute_uri( reverse('api-1-revision-log', url_args={'sha1_git': expected_log[-1]['id']}, query_params={'per_page': per_page})) assert next_log_url in rv['Link'] def test_api_revision_log_not_found(api_client): unknown_revision_ = random_sha1() url = reverse('api-1-revision-log', url_args={'sha1_git': unknown_revision_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Revision with sha1_git %s not found.' % unknown_revision_ } assert not rv.has_header('Link') @given(revision()) def test_api_revision_log_context(api_client, archive_data, revision): revisions = archive_data.revision_log(revision, limit=4) prev_rev = revisions[0]['id'] rev = revisions[-1]['id'] per_page = 10 url = reverse('api-1-revision-log', url_args={'sha1_git': rev, 'prev_sha1s': prev_rev}, query_params={'per_page': per_page}) rv = api_client.get(url) expected_log = archive_data.revision_log(rev, limit=per_page) prev_revision = archive_data.revision_get(prev_rev) expected_log.insert(0, prev_revision) - expected_log = list(map(_enrich_revision, expected_log)) + expected_log = list(map(enrich_revision, expected_log, + [rv.wsgi_request] * len(expected_log))) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == expected_log def test_api_revision_directory_ko_not_found(api_client, mocker): mock_rev_dir = mocker.patch( 'swh.web.api.views.revision._revision_directory_by') mock_rev_dir.side_effect = NotFoundExc('Not found') rv = api_client.get('/api/1/revision/999/directory/some/path/to/dir/') assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == { 'exception': 'NotFoundExc', 'reason': 'Not found' } mock_rev_dir.assert_called_once_with( {'sha1_git': '999'}, 'some/path/to/dir', '/api/1/revision/999/directory/some/path/to/dir/', with_data=False ) def test_api_revision_directory_ok_returns_dir_entries(api_client, mocker): mock_rev_dir = mocker.patch( 'swh.web.api.views.revision._revision_directory_by') stub_dir = { 'type': 'dir', 'revision': '999', 'content': [ { 'sha1_git': '789', 'type': 'file', 'target': '101', 'target_url': '/api/1/content/sha1_git:101/', 'name': 'somefile', 'file_url': '/api/1/revision/999/directory/some/path/' 'somefile/' }, { 'sha1_git': '123', 'type': 'dir', 'target': '456', 'target_url': '/api/1/directory/456/', 'name': 'to-subdir', 'dir_url': '/api/1/revision/999/directory/some/path/' 'to-subdir/', } ] } mock_rev_dir.return_value = stub_dir rv = api_client.get('/api/1/revision/999/directory/some/path/') + stub_dir['content'][0]['target_url'] = rv.wsgi_request.build_absolute_uri( + stub_dir['content'][0]['target_url']) + stub_dir['content'][0]['file_url'] = rv.wsgi_request.build_absolute_uri( + stub_dir['content'][0]['file_url']) + stub_dir['content'][1]['target_url'] = rv.wsgi_request.build_absolute_uri( + stub_dir['content'][1]['target_url']) + stub_dir['content'][1]['dir_url'] = rv.wsgi_request.build_absolute_uri( + stub_dir['content'][1]['dir_url']) + assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == stub_dir mock_rev_dir.assert_called_once_with( {'sha1_git': '999'}, 'some/path', '/api/1/revision/999/directory/some/path/', with_data=False ) def test_api_revision_directory_ok_returns_content(api_client, mocker): mock_rev_dir = mocker.patch( 'swh.web.api.views.revision._revision_directory_by') stub_content = { 'type': 'file', 'revision': '999', 'content': { 'sha1_git': '789', 'sha1': '101', 'data_url': '/api/1/content/101/raw/', } } mock_rev_dir.return_value = stub_content url = '/api/1/revision/666/directory/some/other/path/' rv = api_client.get(url) + stub_content['content']['data_url'] = rv.wsgi_request.build_absolute_uri( + stub_content['content']['data_url']) + assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == stub_content mock_rev_dir.assert_called_once_with( {'sha1_git': '666'}, 'some/other/path', url, with_data=False) @given(revision()) def test_api_revision_uppercase(api_client, revision): url = reverse('api-1-revision-uppercase-checksum', url_args={'sha1_git': revision.upper()}) resp = api_client.get(url) assert resp.status_code == 302 redirect_url = reverse('api-1-revision', url_args={'sha1_git': revision}) assert resp['location'] == redirect_url - - -def _enrich_revision(revision): - directory_url = reverse( - 'api-1-directory', - url_args={'sha1_git': revision['directory']}) - - history_url = reverse('api-1-revision-log', - url_args={'sha1_git': revision['id']}) - - parents_id_url = [] - for p in revision['parents']: - parents_id_url.append({ - 'id': p, - 'url': reverse('api-1-revision', url_args={'sha1_git': p}) - }) - - revision_url = reverse('api-1-revision', - url_args={'sha1_git': revision['id']}) - - revision['directory_url'] = directory_url - revision['history_url'] = history_url - revision['url'] = revision_url - revision['parents'] = parents_id_url - - return revision diff --git a/swh/web/tests/api/views/test_snapshot.py b/swh/web/tests/api/views/test_snapshot.py index bb6ccf05..ea2ea81b 100644 --- a/swh/web/tests/api/views/test_snapshot.py +++ b/swh/web/tests/api/views/test_snapshot.py @@ -1,194 +1,162 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from hypothesis import given from swh.model.hashutil import hash_to_hex +from swh.web.api.utils import enrich_snapshot from swh.web.common.utils import reverse from swh.web.tests.data import random_sha1 from swh.web.tests.strategies import ( snapshot, new_snapshot ) @given(snapshot()) def test_api_snapshot(api_client, archive_data, snapshot): url = reverse('api-1-snapshot', url_args={'snapshot_id': snapshot}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' expected_data = archive_data.snapshot_get(snapshot) - expected_data = _enrich_snapshot(archive_data, expected_data) + expected_data = enrich_snapshot(expected_data, rv.wsgi_request) assert rv.data == expected_data @given(snapshot()) def test_api_snapshot_paginated(api_client, archive_data, snapshot): branches_offset = 0 branches_count = 2 snapshot_branches = [] for k, v in sorted( archive_data.snapshot_get(snapshot)['branches'].items()): snapshot_branches.append({ 'name': k, 'target_type': v['target_type'], 'target': v['target'] }) whole_snapshot = {'id': snapshot, 'branches': {}, 'next_branch': None} while branches_offset < len(snapshot_branches): branches_from = snapshot_branches[branches_offset]['name'] url = reverse('api-1-snapshot', url_args={'snapshot_id': snapshot}, query_params={'branches_from': branches_from, 'branches_count': branches_count}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' expected_data = archive_data.snapshot_get_branches( snapshot, branches_from, branches_count) - expected_data = _enrich_snapshot(archive_data, expected_data) + expected_data = enrich_snapshot(expected_data, rv.wsgi_request) branches_offset += branches_count if branches_offset < len(snapshot_branches): next_branch = snapshot_branches[branches_offset]['name'] expected_data['next_branch'] = next_branch else: expected_data['next_branch'] = None assert rv.data == expected_data whole_snapshot['branches'].update(expected_data['branches']) if branches_offset < len(snapshot_branches): next_url = rv.wsgi_request.build_absolute_uri( reverse('api-1-snapshot', url_args={'snapshot_id': snapshot}, query_params={'branches_from': next_branch, 'branches_count': branches_count})) assert rv['Link'] == '<%s>; rel="next"' % next_url else: assert not rv.has_header('Link') url = reverse('api-1-snapshot', url_args={'snapshot_id': snapshot}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == whole_snapshot @given(snapshot()) def test_api_snapshot_filtered(api_client, archive_data, snapshot): snapshot_branches = [] for k, v in sorted( archive_data.snapshot_get(snapshot)['branches'].items()): snapshot_branches.append({ 'name': k, 'target_type': v['target_type'], 'target': v['target'] }) target_type = random.choice(snapshot_branches)['target_type'] url = reverse('api-1-snapshot', url_args={'snapshot_id': snapshot}, query_params={'target_types': target_type}) rv = api_client.get(url) expected_data = archive_data.snapshot_get_branches( snapshot, target_types=target_type) - expected_data = _enrich_snapshot(archive_data, expected_data) + expected_data = enrich_snapshot(expected_data, rv.wsgi_request) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data == expected_data def test_api_snapshot_errors(api_client): unknown_snapshot_ = random_sha1() url = reverse('api-1-snapshot', url_args={'snapshot_id': '63ce369'}) rv = api_client.get(url) assert rv.status_code == 400, rv.data url = reverse('api-1-snapshot', url_args={'snapshot_id': unknown_snapshot_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data @given(snapshot()) def test_api_snapshot_uppercase(api_client, snapshot): url = reverse('api-1-snapshot-uppercase-checksum', url_args={'snapshot_id': snapshot.upper()}) resp = api_client.get(url) assert resp.status_code == 302 redirect_url = reverse('api-1-snapshot-uppercase-checksum', url_args={'snapshot_id': snapshot}) assert resp['location'] == redirect_url @given(new_snapshot(min_size=4)) def test_api_snapshot_null_branch(api_client, archive_data, new_snapshot): snp_dict = new_snapshot.to_dict() snp_id = hash_to_hex(snp_dict['id']) for branch in snp_dict['branches'].keys(): snp_dict['branches'][branch] = None break archive_data.snapshot_add([snp_dict]) url = reverse('api-1-snapshot', url_args={'snapshot_id': snp_id}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - - -def _enrich_snapshot(archive_data, snapshot): - def _get_branch_url(target_type, target): - url = None - if target_type == 'revision': - url = reverse('api-1-revision', url_args={'sha1_git': target}) - if target_type == 'release': - url = reverse('api-1-release', url_args={'sha1_git': target}) - return url - - for branch in snapshot['branches'].keys(): - target = snapshot['branches'][branch]['target'] - target_type = snapshot['branches'][branch]['target_type'] - snapshot['branches'][branch]['target_url'] = \ - _get_branch_url(target_type, target) - for branch in snapshot['branches'].keys(): - target = snapshot['branches'][branch]['target'] - target_type = snapshot['branches'][branch]['target_type'] - if target_type == 'alias': - if target in snapshot['branches']: - snapshot['branches'][branch]['target_url'] = \ - snapshot['branches'][target]['target_url'] - else: - snp = archive_data.snapshot_get_branches(snapshot['id'], - branches_from=target, - branches_count=1) - alias_target = snp['branches'][target]['target'] - alias_target_type = snp['branches'][target]['target_type'] - snapshot['branches'][branch]['target_url'] = \ - _get_branch_url(alias_target_type, alias_target) - - return snapshot diff --git a/swh/web/tests/api/views/test_vault.py b/swh/web/tests/api/views/test_vault.py index 1782ceb4..74056d60 100644 --- a/swh/web/tests/api/views/test_vault.py +++ b/swh/web/tests/api/views/test_vault.py @@ -1,118 +1,115 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.model import hashutil +from swh.web.common.utils import reverse TEST_OBJ_ID = 'd4905454cc154b492bd6afed48694ae3c579345e' -OBJECT_TYPES = {'directory': ('directory', None), - 'revision_gitfast': ('revision', 'gitfast')} +OBJECT_TYPES = ('directory', 'revision_gitfast') def test_api_vault_cook(api_client, mocker): mock_service = mocker.patch('swh.web.api.views.vault.service') - stub_cook = { - 'fetch_url': ('http://127.0.0.1:5004/api/1/vault/directory/{}/raw/' - .format(TEST_OBJ_ID)), - 'obj_id': TEST_OBJ_ID, - 'obj_type': 'test_type', - 'progress_message': None, - 'status': 'done', - 'task_uuid': 'de75c902-5ee5-4739-996e-448376a93eff', - } - stub_fetch = b'content' - - mock_service.vault_cook.return_value = stub_cook - mock_service.vault_fetch.return_value = stub_fetch - - for obj_type, (obj_type_name, obj_type_format) in OBJECT_TYPES.items(): - url = '/api/1/vault/{}/{}/'.format(obj_type_name, TEST_OBJ_ID) - if obj_type_format: - url += '{}/'.format(obj_type_format) + + for obj_type in OBJECT_TYPES: + + fetch_url = reverse(f'api-1-vault-fetch-{obj_type}', + url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) + stub_cook = { + 'fetch_url': fetch_url, + 'obj_id': TEST_OBJ_ID, + 'obj_type': obj_type, + 'progress_message': None, + 'status': 'done', + 'task_uuid': 'de75c902-5ee5-4739-996e-448376a93eff', + } + stub_fetch = b'content' + + mock_service.vault_cook.return_value = stub_cook + mock_service.vault_fetch.return_value = stub_fetch + + url = reverse(f'api-1-vault-cook-{obj_type}', + url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) + rv = api_client.post(url, {'email': 'test@test.mail'}) assert rv.status_code == 200, rv.data assert rv['Content-Type'] == 'application/json' + stub_cook['fetch_url'] = rv.wsgi_request.build_absolute_uri( + stub_cook['fetch_url']) + assert rv.data == stub_cook mock_service.vault_cook.assert_called_with( obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID), 'test@test.mail') - rv = api_client.get(url + 'raw/') + rv = api_client.get(fetch_url) assert rv.status_code == 200 assert rv['Content-Type'] == 'application/gzip' assert rv.content == stub_fetch mock_service.vault_fetch.assert_called_with( obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID)) -def test_api_vault_cook_uppercase_hash(api_client, mocker): - mock_service = mocker.patch('swh.web.api.views.vault.service') - stub_cook = { - 'fetch_url': ('http://127.0.0.1:5004/api/1/vault/directory/{}/raw/' - .format(TEST_OBJ_ID.upper())), - 'obj_id': TEST_OBJ_ID.upper(), - 'obj_type': 'test_type', - 'progress_message': None, - 'status': 'done', - 'task_uuid': 'de75c902-5ee5-4739-996e-448376a93eff', - } - stub_fetch = b'content' - - mock_service.vault_cook.return_value = stub_cook - mock_service.vault_fetch.return_value = stub_fetch - - for obj_type, (obj_type_name, obj_type_format) in OBJECT_TYPES.items(): - url = '/api/1/vault/{}/{}/'.format(obj_type_name, TEST_OBJ_ID) - if obj_type_format: - url += '{}/'.format(obj_type_format) +def test_api_vault_cook_uppercase_hash(api_client): + + for obj_type in OBJECT_TYPES: + + url = reverse(f'api-1-vault-cook-{obj_type}-uppercase-checksum', + url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID.upper()}) rv = api_client.post(url, {'email': 'test@test.mail'}) - assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv.status_code == 302 - assert rv.data == stub_cook - mock_service.vault_cook.assert_called_with( - obj_type, - hashutil.hash_to_bytes(TEST_OBJ_ID), - 'test@test.mail') + redirect_url = reverse(f'api-1-vault-cook-{obj_type}', + url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) - rv = api_client.get(url + 'raw/') + assert rv['location'] == redirect_url - assert rv.status_code == 200 - assert rv['Content-Type'] == 'application/gzip' - assert rv.content == stub_fetch - mock_service.vault_fetch.assert_called_with( - obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID)) + fetch_url = reverse( + f'api-1-vault-fetch-{obj_type}-uppercase-checksum', + url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID.upper()}) + + rv = api_client.get(fetch_url) + + assert rv.status_code == 302 + + redirect_url = reverse(f'api-1-vault-fetch-{obj_type}', + url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) + + assert rv['location'] == redirect_url def test_api_vault_cook_notfound(api_client, mocker): mock_service = mocker.patch('swh.web.api.views.vault.service') mock_service.vault_cook.return_value = None mock_service.vault_fetch.return_value = None - for obj_type, (obj_type_name, obj_type_format) in OBJECT_TYPES.items(): - url = '/api/1/vault/{}/{}/'.format(obj_type_name, TEST_OBJ_ID) - if obj_type_format: - url += '{}/'.format(obj_type_format) + for obj_type in OBJECT_TYPES: + url = reverse(f'api-1-vault-cook-{obj_type}', + url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) rv = api_client.post(url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data['exception'] == 'NotFoundExc' mock_service.vault_cook.assert_called_with( obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID), None) - rv = api_client.get(url + 'raw/') + fetch_url = reverse(f'api-1-vault-fetch-{obj_type}', + url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) + + rv = api_client.get(fetch_url) assert rv.status_code == 404, rv.data assert rv['Content-Type'] == 'application/json' assert rv.data['exception'] == 'NotFoundExc' mock_service.vault_fetch.assert_called_with( obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID)) diff --git a/swh/web/tests/common/test_templatetags.py b/swh/web/tests/common/test_templatetags.py index 63c574df..e039dd9f 100644 --- a/swh/web/tests/common/test_templatetags.py +++ b/swh/web/tests/common/test_templatetags.py @@ -1,62 +1,64 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.web.common.swh_templatetags import ( urlize_links_and_mails, urlize_header_links, safe_docstring_display ) -def test_urlize_api_links_api(): - # update api link with html links content with links - content = '{"url": "/api/1/abc/"}' - expected_content = ('{"url": "/api/1/abc/"}') +def test_urlize_http_link(): + link = 'https://example.com/api/1/abc/' + expected_content = f'{link}' + + assert urlize_links_and_mails(link) == expected_content - assert urlize_links_and_mails(content) == expected_content +def test_urlize_email(): + email = 'someone@example.com' + expected_content = f'{email}' -def test_urlize_api_links_browse(): - # update /browse link with html links content with links - content = '{"url": "/browse/def/"}' - expected_content = ('{"url": "' - '/browse/def/"}') - assert urlize_links_and_mails(content) == expected_content + assert urlize_links_and_mails(email) == expected_content def test_urlize_header_links(): - # update api link with html links content with links - content = '; rel="next"\n; rel="prev"' - expected_content = ('</api/1/abc/>; rel="next"\n' - '</api/1/def/>; rel="prev"') + next_link = 'https://example.com/api/1/abc/' + prev_link = 'https://example.com/api/1/def/' + + content = f'<{next_link}>; rel="next"\n<{prev_link}>; rel="prev"' + + expected_content = ( + f'<{next_link}>; rel="next"\n' + f'<{prev_link}>; rel="prev"') assert urlize_header_links(content) == expected_content # remove deprecation warnings related to docutils @pytest.mark.filterwarnings( 'ignore:.*U.*mode is deprecated:DeprecationWarning') def test_safe_docstring_display(): # update api link with html links content with links docstring = ( 'This is my list header:\n\n' ' - Here is item 1, with a continuation\n' ' line right here\n' ' - Here is item 2\n\n' ' Here is something that is not part of the list' ) expected_docstring = ( '

This is my list header:

\n' '
    \n' '
  • Here is item 1, with a continuation\n' 'line right here
  • \n' '
  • Here is item 2
  • \n' '
\n' '

Here is something that is not part of the list

\n' ) assert safe_docstring_display(docstring) == expected_docstring diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py index 28d621b5..65b40e51 100644 --- a/swh/web/tests/conftest.py +++ b/swh/web/tests/conftest.py @@ -1,296 +1,301 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import inspect import json import os import shutil from subprocess import run, PIPE import pytest from django.core.cache import cache from hypothesis import settings, HealthCheck from rest_framework.test import APIClient, APIRequestFactory from swh.model.hashutil import ALGORITHMS, hash_to_bytes from swh.web.common import converters from swh.web.tests.data import get_tests_data, override_storages # Used to skip some tests ctags_json_missing = ( shutil.which('ctags') is None or b'+json' not in run(['ctags', '--version'], stdout=PIPE).stdout ) fossology_missing = shutil.which('nomossa') is None # Register some hypothesis profiles settings.register_profile('default', settings()) settings.register_profile( 'swh-web', settings(deadline=None, suppress_health_check=[HealthCheck.too_slow, HealthCheck.filter_too_much])) settings.register_profile( 'swh-web-fast', settings(deadline=None, max_examples=1, suppress_health_check=[HealthCheck.too_slow, HealthCheck.filter_too_much])) def pytest_configure(config): # Small hack in order to be able to run the unit tests # without static assets generated by webpack. # Those assets are not really needed for the Python tests # but the django templates will fail to load due to missing # generated file webpack-stats.json describing the js and css # files to include. # So generate a dummy webpack-stats.json file to overcome # that issue. test_dir = os.path.dirname(__file__) static_dir = os.path.join(test_dir, '../static') webpack_stats = os.path.join(static_dir, 'webpack-stats.json') if os.path.exists(webpack_stats): return bundles_dir = os.path.join(test_dir, '../assets/src/bundles') _, dirs, _ = next(os.walk(bundles_dir)) mock_webpack_stats = { 'status': 'done', 'publicPath': '/static', 'chunks': {} } for bundle in dirs: asset = 'js/%s.js' % bundle mock_webpack_stats['chunks'][bundle] = [{ 'name': asset, 'publicPath': '/static/%s' % asset, 'path': os.path.join(static_dir, asset) }] with open(webpack_stats, 'w') as outfile: json.dump(mock_webpack_stats, outfile) # Clear Django cache before each test @pytest.fixture(autouse=True) def django_cache_cleared(): cache.clear() +# Alias rf fixture from pytest-django +@pytest.fixture +def request_factory(rf): + return rf + # Fixture to get test client from Django REST Framework @pytest.fixture(scope='module') def api_client(): return APIClient() # Fixture to get API request factory from Django REST Framework @pytest.fixture(scope='module') def api_request_factory(): return APIRequestFactory() # Initialize tests data @pytest.fixture(autouse=True) def tests_data(): data = get_tests_data(reset=True) # Update swh-web configuration to use the in-memory storages # instantiated in the tests.data module override_storages(data['storage'], data['idx_storage']) return data # Fixture to manipulate data from a sample archive used in the tests @pytest.fixture def archive_data(tests_data): return _ArchiveData(tests_data) # Fixture to manipulate indexer data from a sample archive used in the tests @pytest.fixture def indexer_data(tests_data): return _IndexerData(tests_data) # Custom data directory for requests_mock @pytest.fixture def datadir(): return os.path.join(os.path.abspath(os.path.dirname(__file__)), 'resources') class _ArchiveData: """ Helper class to manage data from a sample test archive. It is initialized with a reference to an in-memory storage containing raw tests data. It is basically a proxy to Storage interface but it overrides some methods to retrieve those tests data in a json serializable format in order to ease tests implementation. """ def __init__(self, tests_data): self.storage = tests_data['storage'] def _call_storage_method(method): def call_storage_method(*args, **kwargs): return method(*args, **kwargs) return call_storage_method # Forward calls to non overridden Storage methods to wrapped # storage instance for method_name, method in inspect.getmembers( self.storage, predicate=inspect.ismethod): if (not hasattr(self, method_name) and not method_name.startswith('_')): setattr(self, method_name, _call_storage_method(method)) def content_find(self, content): cnt_ids_bytes = {algo_hash: hash_to_bytes(content[algo_hash]) for algo_hash in ALGORITHMS if content.get(algo_hash)} cnt = self.storage.content_find(cnt_ids_bytes) return converters.from_content(cnt[0]) if cnt else cnt def content_get_metadata(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) metadata = next(self.storage.content_get_metadata([cnt_id_bytes])) return converters.from_swh(metadata, hashess={'sha1', 'sha1_git', 'sha256', 'blake2s256'}) def content_get(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) cnt = next(self.storage.content_get([cnt_id_bytes])) return converters.from_content(cnt) def directory_get(self, dir_id): return { 'id': dir_id, 'content': self.directory_ls(dir_id) } def directory_ls(self, dir_id): cnt_id_bytes = hash_to_bytes(dir_id) dir_content = map(converters.from_directory_entry, self.storage.directory_ls(cnt_id_bytes)) return list(dir_content) def release_get(self, rel_id): rel_id_bytes = hash_to_bytes(rel_id) rel_data = next(self.storage.release_get([rel_id_bytes])) return converters.from_release(rel_data) def revision_get(self, rev_id): rev_id_bytes = hash_to_bytes(rev_id) rev_data = next(self.storage.revision_get([rev_id_bytes])) return converters.from_revision(rev_data) def revision_log(self, rev_id, limit=None): rev_id_bytes = hash_to_bytes(rev_id) return list(map(converters.from_revision, self.storage.revision_log([rev_id_bytes], limit=limit))) def snapshot_get_latest(self, origin_url): snp = self.storage.snapshot_get_latest(origin_url) return converters.from_snapshot(snp) def origin_get(self, origin_info): origin = self.storage.origin_get(origin_info) return converters.from_origin(origin) def origin_visit_get(self, origin_url): visits = self.storage.origin_visit_get(origin_url) return list(map(converters.from_origin_visit, visits)) def origin_visit_get_by(self, origin_url, visit_id): visit = self.storage.origin_visit_get_by(origin_url, visit_id) return converters.from_origin_visit(visit) def snapshot_get(self, snapshot_id): snp = self.storage.snapshot_get(hash_to_bytes(snapshot_id)) return converters.from_snapshot(snp) def snapshot_get_branches(self, snapshot_id, branches_from='', branches_count=1000, target_types=None): snp = self.storage.snapshot_get_branches( hash_to_bytes(snapshot_id), branches_from.encode(), branches_count, target_types) return converters.from_snapshot(snp) def snapshot_get_head(self, snapshot): if snapshot['branches']['HEAD']['target_type'] == 'alias': target = snapshot['branches']['HEAD']['target'] head = snapshot['branches'][target]['target'] else: head = snapshot['branches']['HEAD']['target'] return head class _IndexerData: """ Helper class to manage indexer tests data It is initialized with a reference to an in-memory indexer storage containing raw tests data. It also defines class methods to retrieve those tests data in a json serializable format in order to ease tests implementation. """ def __init__(self, tests_data): self.idx_storage = tests_data['idx_storage'] self.mimetype_indexer = tests_data['mimetype_indexer'] self.license_indexer = tests_data['license_indexer'] self.ctags_indexer = tests_data['ctags_indexer'] def content_add_mimetype(self, cnt_id): self.mimetype_indexer.run([hash_to_bytes(cnt_id)], 'update-dups') def content_get_mimetype(self, cnt_id): mimetype = next(self.idx_storage.content_mimetype_get( [hash_to_bytes(cnt_id)])) return converters.from_filetype(mimetype) def content_add_language(self, cnt_id): raise NotImplementedError('Language indexer is disabled.') self.language_indexer.run([hash_to_bytes(cnt_id)], 'update-dups') def content_get_language(self, cnt_id): lang = next(self.idx_storage.content_language_get( [hash_to_bytes(cnt_id)])) return converters.from_swh(lang, hashess={'id'}) def content_add_license(self, cnt_id): self.license_indexer.run([hash_to_bytes(cnt_id)], 'update-dups') def content_get_license(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) lic = next(self.idx_storage.content_fossology_license_get( [cnt_id_bytes])) return converters.from_swh({'id': cnt_id_bytes, 'facts': lic[cnt_id_bytes]}, hashess={'id'}) def content_add_ctags(self, cnt_id): self.ctags_indexer.run([hash_to_bytes(cnt_id)], 'update-dups') def content_get_ctags(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) ctags = self.idx_storage.content_ctags_get([cnt_id_bytes]) for ctag in ctags: yield converters.from_swh(ctag, hashess={'id'})